LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 89.8 % 2196 1971 18 58 144 5 62 1240 112 557 152 1232 6 124
Current Date: 2023-04-08 17:13:01 Functions: 92.0 % 163 150 13 144 6 13 140 10
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 62.1 % 29 18 11 17 1
Legend: Lines: hit not hit (60,120] days: 93.2 % 73 68 5 68
(120,180] days: 100.0 % 1 1 1
(180,240] days: 90.0 % 30 27 2 1 2 25
(240..) days: 90.0 % 2063 1857 58 143 5 62 1237 2 556 152 1232
Function coverage date bins:
(60,120] days: 100.0 % 3 3 3
(180,240] days: 100.0 % 3 3 3
(240..) days: 46.5 % 310 144 13 144 13 140

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * varlena.c
                                  4                 :  *    Functions for the variable-length built-in types.
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/utils/adt/varlena.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include <ctype.h>
                                 18                 : #include <limits.h>
                                 19                 : 
                                 20                 : #include "access/detoast.h"
                                 21                 : #include "access/toast_compression.h"
                                 22                 : #include "catalog/pg_collation.h"
                                 23                 : #include "catalog/pg_type.h"
                                 24                 : #include "common/hashfn.h"
                                 25                 : #include "common/int.h"
                                 26                 : #include "common/unicode_norm.h"
                                 27                 : #include "funcapi.h"
                                 28                 : #include "lib/hyperloglog.h"
                                 29                 : #include "libpq/pqformat.h"
                                 30                 : #include "miscadmin.h"
                                 31                 : #include "nodes/execnodes.h"
                                 32                 : #include "parser/scansup.h"
                                 33                 : #include "port/pg_bswap.h"
                                 34                 : #include "regex/regex.h"
                                 35                 : #include "utils/builtins.h"
                                 36                 : #include "utils/bytea.h"
                                 37                 : #include "utils/guc.h"
                                 38                 : #include "utils/lsyscache.h"
                                 39                 : #include "utils/memutils.h"
                                 40                 : #include "utils/pg_locale.h"
                                 41                 : #include "utils/sortsupport.h"
                                 42                 : #include "utils/varlena.h"
                                 43                 : 
                                 44                 : 
                                 45                 : /* GUC variable */
                                 46                 : int         bytea_output = BYTEA_OUTPUT_HEX;
                                 47                 : 
                                 48                 : typedef struct varlena VarString;
                                 49                 : 
                                 50                 : /*
                                 51                 :  * State for text_position_* functions.
                                 52                 :  */
                                 53                 : typedef struct
                                 54                 : {
                                 55                 :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
                                 56                 : 
                                 57                 :     char       *str1;           /* haystack string */
                                 58                 :     char       *str2;           /* needle string */
                                 59                 :     int         len1;           /* string lengths in bytes */
                                 60                 :     int         len2;
                                 61                 : 
                                 62                 :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
                                 63                 :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
                                 64                 :     int         skiptable[256]; /* skip distance for given mismatched char */
                                 65                 : 
                                 66                 :     char       *last_match;     /* pointer to last match in 'str1' */
                                 67                 : 
                                 68                 :     /*
                                 69                 :      * Sometimes we need to convert the byte position of a match to a
                                 70                 :      * character position.  These store the last position that was converted,
                                 71                 :      * so that on the next call, we can continue from that point, rather than
                                 72                 :      * count characters from the very beginning.
                                 73                 :      */
                                 74                 :     char       *refpoint;       /* pointer within original haystack string */
                                 75                 :     int         refpos;         /* 0-based character offset of the same point */
                                 76                 : } TextPositionState;
                                 77                 : 
                                 78                 : typedef struct
                                 79                 : {
                                 80                 :     char       *buf1;           /* 1st string, or abbreviation original string
                                 81                 :                                  * buf */
                                 82                 :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
                                 83                 :     int         buflen1;        /* Allocated length of buf1 */
                                 84                 :     int         buflen2;        /* Allocated length of buf2 */
                                 85                 :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
                                 86                 :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
                                 87                 :     int         last_returned;  /* Last comparison result (cache) */
                                 88                 :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
                                 89                 :     bool        collate_c;
                                 90                 :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
                                 91                 :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
                                 92                 :     hyperLogLogState full_card; /* Full key cardinality state */
                                 93                 :     double      prop_card;      /* Required cardinality proportion */
                                 94                 :     pg_locale_t locale;
                                 95                 : } VarStringSortSupport;
                                 96                 : 
                                 97                 : /*
                                 98                 :  * Output data for split_text(): we output either to an array or a table.
                                 99                 :  * tupstore and tupdesc must be set up in advance to output to a table.
                                100                 :  */
                                101                 : typedef struct
                                102                 : {
                                103                 :     ArrayBuildState *astate;
                                104                 :     Tuplestorestate *tupstore;
                                105                 :     TupleDesc   tupdesc;
                                106                 : } SplitTextOutputData;
                                107                 : 
                                108                 : /*
                                109                 :  * This should be large enough that most strings will fit, but small enough
                                110                 :  * that we feel comfortable putting it on the stack
                                111                 :  */
                                112                 : #define TEXTBUFLEN      1024
                                113                 : 
                                114                 : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
                                115                 : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
                                116                 : 
                                117                 : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
                                118                 : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
                                119                 : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
                                120                 : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
                                121                 : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
                                122                 : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
                                123                 : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
                                124                 : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
                                125                 : static int32 text_length(Datum str);
                                126                 : static text *text_catenate(text *t1, text *t2);
                                127                 : static text *text_substring(Datum str,
                                128                 :                             int32 start,
                                129                 :                             int32 length,
                                130                 :                             bool length_not_specified);
                                131                 : static text *text_overlay(text *t1, text *t2, int sp, int sl);
                                132                 : static int  text_position(text *t1, text *t2, Oid collid);
                                133                 : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
                                134                 : static bool text_position_next(TextPositionState *state);
                                135                 : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
                                136                 : static char *text_position_get_match_ptr(TextPositionState *state);
                                137                 : static int  text_position_get_match_pos(TextPositionState *state);
                                138                 : static void text_position_cleanup(TextPositionState *state);
                                139                 : static void check_collation_set(Oid collid);
                                140                 : static int  text_cmp(text *arg1, text *arg2, Oid collid);
                                141                 : static bytea *bytea_catenate(bytea *t1, bytea *t2);
                                142                 : static bytea *bytea_substring(Datum str,
                                143                 :                               int S,
                                144                 :                               int L,
                                145                 :                               bool length_not_specified);
                                146                 : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
                                147                 : static void appendStringInfoText(StringInfo str, const text *t);
                                148                 : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
                                149                 : static void split_text_accum_result(SplitTextOutputData *tstate,
                                150                 :                                     text *field_value,
                                151                 :                                     text *null_string,
                                152                 :                                     Oid collation);
                                153                 : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
                                154                 :                                     const char *fldsep, const char *null_string);
                                155                 : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
                                156                 : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
                                157                 :                                      int *value);
                                158                 : static const char *text_format_parse_format(const char *start_ptr,
                                159                 :                                             const char *end_ptr,
                                160                 :                                             int *argpos, int *widthpos,
                                161                 :                                             int *flags, int *width);
                                162                 : static void text_format_string_conversion(StringInfo buf, char conversion,
                                163                 :                                           FmgrInfo *typOutputInfo,
                                164                 :                                           Datum value, bool isNull,
                                165                 :                                           int flags, int width);
                                166                 : static void text_format_append_string(StringInfo buf, const char *str,
                                167                 :                                       int flags, int width);
                                168                 : 
                                169                 : 
                                170                 : /*****************************************************************************
                                171                 :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
                                172                 :  *****************************************************************************/
                                173                 : 
                                174                 : /*
                                175                 :  * cstring_to_text
 5493 tgl                       176 ECB             :  *
                                177                 :  * Create a text value from a null-terminated C string.
                                178                 :  *
                                179                 :  * The new text value is freshly palloc'd with a full-size VARHDR.
                                180                 :  */
                                181                 : text *
 5493 tgl                       182 GIC    12430865 : cstring_to_text(const char *s)
                                183                 : {
                                184        12430865 :     return cstring_to_text_with_len(s, strlen(s));
                                185                 : }
                                186                 : 
                                187                 : /*
 5493 tgl                       188 ECB             :  * cstring_to_text_with_len
                                189                 :  *
                                190                 :  * Same as cstring_to_text except the caller specifies the string length;
                                191                 :  * the string need not be null_terminated.
                                192                 :  */
                                193                 : text *
 5493 tgl                       194 GIC    14970995 : cstring_to_text_with_len(const char *s, int len)
 5493 tgl                       195 ECB             : {
 5493 tgl                       196 GIC    14970995 :     text       *result = (text *) palloc(len + VARHDRSZ);
                                197                 : 
                                198        14970995 :     SET_VARSIZE(result, len + VARHDRSZ);
                                199        14970995 :     memcpy(VARDATA(result), s, len);
                                200                 : 
                                201        14970995 :     return result;
                                202                 : }
                                203                 : 
                                204                 : /*
                                205                 :  * text_to_cstring
                                206                 :  *
                                207                 :  * Create a palloc'd, null-terminated C string from a text value.
                                208                 :  *
 5493 tgl                       209 ECB             :  * We support being passed a compressed or toasted text value.
                                210                 :  * This is a bit bogus since such values shouldn't really be referred to as
                                211                 :  * "text *", but it seems useful for robustness.  If we didn't handle that
                                212                 :  * case here, we'd need another routine that did, anyway.
                                213                 :  */
                                214                 : char *
 5493 tgl                       215 GIC     8999458 : text_to_cstring(const text *t)
 5493 tgl                       216 ECB             : {
                                217                 :     /* must cast away the const, unfortunately */
 1627 peter_e                   218 CBC     8999458 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
 5493 tgl                       219 GIC     8999458 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
 5493 tgl                       220 ECB             :     char       *result;
                                221                 : 
 5493 tgl                       222 GIC     8999458 :     result = (char *) palloc(len + 1);
 5493 tgl                       223 CBC     8999458 :     memcpy(result, VARDATA_ANY(tunpacked), len);
 5493 tgl                       224 GIC     8999458 :     result[len] = '\0';
                                225                 : 
                                226         8999458 :     if (tunpacked != t)
                                227           64653 :         pfree(tunpacked);
                                228                 : 
                                229         8999458 :     return result;
                                230                 : }
                                231                 : 
                                232                 : /*
                                233                 :  * text_to_cstring_buffer
                                234                 :  *
                                235                 :  * Copy a text value into a caller-supplied buffer of size dst_len.
                                236                 :  *
                                237                 :  * The text string is truncated if necessary to fit.  The result is
                                238                 :  * guaranteed null-terminated (unless dst_len == 0).
                                239                 :  *
 5493 tgl                       240 ECB             :  * We support being passed a compressed or toasted text value.
                                241                 :  * This is a bit bogus since such values shouldn't really be referred to as
                                242                 :  * "text *", but it seems useful for robustness.  If we didn't handle that
                                243                 :  * case here, we'd need another routine that did, anyway.
                                244                 :  */
                                245                 : void
 5493 tgl                       246 CBC         320 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
                                247                 : {
 5493 tgl                       248 ECB             :     /* must cast away the const, unfortunately */
 1627 peter_e                   249 CBC         320 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
 5493 tgl                       250             320 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
                                251                 : 
 5493 tgl                       252 GBC         320 :     if (dst_len > 0)
 5493 tgl                       253 ECB             :     {
 5493 tgl                       254 CBC         320 :         dst_len--;
 5493 tgl                       255 GIC         320 :         if (dst_len >= src_len)
                                256             320 :             dst_len = src_len;
 2118 tgl                       257 ECB             :         else                    /* ensure truncation is encoding-safe */
 5493 tgl                       258 UBC           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
 5493 tgl                       259 CBC         320 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
 5493 tgl                       260 GIC         320 :         dst[dst_len] = '\0';
                                261                 :     }
                                262                 : 
                                263             320 :     if (srcunpacked != src)
 5493 tgl                       264 UIC           0 :         pfree(srcunpacked);
 5493 tgl                       265 GIC         320 : }
                                266                 : 
                                267                 : 
                                268                 : /*****************************************************************************
                                269                 :  *   USER I/O ROUTINES                                                       *
                                270                 :  *****************************************************************************/
                                271                 : 
                                272                 : 
                                273                 : #define VAL(CH)         ((CH) - '0')
                                274                 : #define DIG(VAL)        ((VAL) + '0')
                                275                 : 
                                276                 : /*
                                277                 :  *      byteain         - converts from printable representation of byte array
                                278                 :  *
                                279                 :  *      Non-printable characters must be passed as '\nnn' (octal) and are
                                280                 :  *      converted to internal form.  '\' must be passed as '\\'.
                                281                 :  *      ereport(ERROR, ...) if bad form.
 9770 scrappy                   282 ECB             :  *
                                283                 :  *      BUGS:
 6251 neilc                     284                 :  *              The input is scanned twice.
 9345 bruce                     285                 :  *              The error checking of input is minimal.
                                286                 :  */
                                287                 : Datum
 8289 tgl                       288 GIC      131417 : byteain(PG_FUNCTION_ARGS)
                                289                 : {
                                290          131417 :     char       *inputText = PG_GETARG_CSTRING(0);
  116 tgl                       291 GNC      131417 :     Node       *escontext = fcinfo->context;
                                292                 :     char       *tp;
 9229 lockhart                  293 ECB             :     char       *rp;
                                294                 :     int         bc;
 8416 tgl                       295                 :     bytea      *result;
                                296                 : 
 4996                           297                 :     /* Recognize hex input */
 4996 tgl                       298 CBC      131417 :     if (inputText[0] == '\\' && inputText[1] == 'x')
 4996 tgl                       299 ECB             :     {
 4790 bruce                     300 GIC         405 :         size_t      len = strlen(inputText);
 4996 tgl                       301 ECB             : 
  598 michael                   302 GIC         405 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
 4996 tgl                       303 CBC         405 :         result = palloc(bc);
  116 tgl                       304 GNC         405 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
                                305                 :                              escontext);
 2118 tgl                       306 GIC         399 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
                                307                 : 
 4996 tgl                       308 CBC         399 :         PG_RETURN_BYTEA_P(result);
                                309                 :     }
 4996 tgl                       310 ECB             : 
                                311                 :     /* Else, it's the traditional escaped style */
 4996 tgl                       312 CBC     2176934 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
 8289 tgl                       313 ECB             :     {
 7877 bruce                     314 CBC     2045928 :         if (tp[0] != '\\')
                                315         2045424 :             tp++;
 7836                           316             504 :         else if ((tp[0] == '\\') &&
                                317             504 :                  (tp[1] >= '0' && tp[1] <= '3') &&
                                318             498 :                  (tp[2] >= '0' && tp[2] <= '7') &&
 7836 bruce                     319 GBC         498 :                  (tp[3] >= '0' && tp[3] <= '7'))
 7877 bruce                     320 GIC         498 :             tp += 4;
 7836                           321               6 :         else if ((tp[0] == '\\') &&
                                322               6 :                  (tp[1] == '\\'))
 7877 bruce                     323 UIC           0 :             tp += 2;
                                324                 :         else
 7877 bruce                     325 ECB             :         {
                                326                 :             /*
                                327                 :              * one backslash, not followed by another or ### valid octal
                                328                 :              */
  116 tgl                       329 GNC           6 :             ereturn(escontext, (Datum) 0,
                                330                 :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 2272 alvherre                  331 ECB             :                      errmsg("invalid input syntax for type %s", "bytea")));
                                332                 :         }
 8289 tgl                       333                 :     }
                                334                 : 
 4996 tgl                       335 GIC      131006 :     bc += VARHDRSZ;
 5050 bruce                     336 ECB             : 
 4996 tgl                       337 CBC      131006 :     result = (bytea *) palloc(bc);
                                338          131006 :     SET_VARSIZE(result, bc);
                                339                 : 
 8289                           340          131006 :     tp = inputText;
 7275                           341          131006 :     rp = VARDATA(result);
 9345 bruce                     342         2176913 :     while (*tp != '\0')
 8289 tgl                       343 ECB             :     {
 7877 bruce                     344 CBC     2045907 :         if (tp[0] != '\\')
 9345                           345         2045409 :             *rp++ = *tp++;
 7836 bruce                     346 GIC         498 :         else if ((tp[0] == '\\') &&
 7836 bruce                     347 CBC         498 :                  (tp[1] >= '0' && tp[1] <= '3') &&
                                348             498 :                  (tp[2] >= '0' && tp[2] <= '7') &&
                                349             498 :                  (tp[3] >= '0' && tp[3] <= '7'))
 9345 bruce                     350 ECB             :         {
 4996 tgl                       351 CBC         498 :             bc = VAL(tp[1]);
 4996 tgl                       352 GIC         498 :             bc <<= 3;
 4996 tgl                       353 CBC         498 :             bc += VAL(tp[2]);
 4996 tgl                       354 GIC         498 :             bc <<= 3;
 4996 tgl                       355 GBC         498 :             *rp++ = bc + VAL(tp[3]);
 5050 bruce                     356 EUB             : 
 7877 bruce                     357 GIC         498 :             tp += 4;
 7877 bruce                     358 EUB             :         }
 7836 bruce                     359 UBC           0 :         else if ((tp[0] == '\\') &&
 7836 bruce                     360 UIC           0 :                  (tp[1] == '\\'))
                                361                 :         {
 7877                           362               0 :             *rp++ = '\\';
                                363               0 :             tp += 2;
                                364                 :         }
                                365                 :         else
 7877 bruce                     366 EUB             :         {
                                367                 :             /*
                                368                 :              * We should never get here. The first pass should not allow it.
                                369                 :              */
  116 tgl                       370 UNC           0 :             ereturn(escontext, (Datum) 0,
                                371                 :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 2272 alvherre                  372 ECB             :                      errmsg("invalid input syntax for type %s", "bytea")));
                                373                 :         }
                                374                 :     }
                                375                 : 
 8289 tgl                       376 GIC      131006 :     PG_RETURN_BYTEA_P(result);
                                377                 : }
                                378                 : 
                                379                 : /*
                                380                 :  *      byteaout        - converts to printable representation of byte array
                                381                 :  *
 4996 tgl                       382 ECB             :  *      In the traditional escaped format, non-printable characters are
                                383                 :  *      printed as '\nnn' (octal) and '\' as '\\'.
 9770 scrappy                   384                 :  */
                                385                 : Datum
 8289 tgl                       386 GIC        6677 : byteaout(PG_FUNCTION_ARGS)
                                387                 : {
 5847 tgl                       388 CBC        6677 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
                                389                 :     char       *result;
                                390                 :     char       *rp;
 4996 tgl                       391 ECB             : 
 4996 tgl                       392 CBC        6677 :     if (bytea_output == BYTEA_OUTPUT_HEX)
 4996 tgl                       393 ECB             :     {
                                394                 :         /* Print hex format */
  598 michael                   395 GIC        6486 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
 4996 tgl                       396 CBC        6486 :         *rp++ = '\\';
 4996 tgl                       397 GIC        6486 :         *rp++ = 'x';
  598 michael                   398            6486 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
                                399                 :     }
 4996 tgl                       400             191 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
                                401                 :     {
                                402                 :         /* Print traditional escaped format */
 4790 bruce                     403 ECB             :         char       *vp;
 1097 tgl                       404                 :         uint64      len;
 4790 bruce                     405                 :         int         i;
                                406                 : 
 4790 bruce                     407 CBC         191 :         len = 1;                /* empty string has 1 char */
 4790 bruce                     408 GBC         191 :         vp = VARDATA_ANY(vlena);
 4790 bruce                     409 CBC      108825 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 9345 bruce                     410 ECB             :         {
 4790 bruce                     411 GIC      108634 :             if (*vp == '\\')
 4790 bruce                     412 LBC           0 :                 len += 2;
 4790 bruce                     413 GIC      108634 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
                                414             246 :                 len += 4;
                                415                 :             else
                                416          108388 :                 len++;
                                417                 :         }
                                418                 : 
                                419                 :         /*
 1097 tgl                       420 ECB             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
 1097 tgl                       421 EUB             :          * for safety let's check rather than relying on palloc's internal
                                422                 :          * check.
                                423                 :          */
 1097 tgl                       424 CBC         191 :         if (len > MaxAllocSize)
 1097 tgl                       425 UIC           0 :             ereport(ERROR,
 1097 tgl                       426 ECB             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                427                 :                      errmsg_internal("result of bytea output conversion is too large")));
 4790 bruce                     428 GIC         191 :         rp = result = (char *) palloc(len);
 1097 tgl                       429 ECB             : 
 4790 bruce                     430 GIC         191 :         vp = VARDATA_ANY(vlena);
 4790 bruce                     431 GBC      108825 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 9345 bruce                     432 EUB             :         {
 4790 bruce                     433 GIC      108634 :             if (*vp == '\\')
 4790 bruce                     434 ECB             :             {
 4790 bruce                     435 LBC           0 :                 *rp++ = '\\';
 4790 bruce                     436 UIC           0 :                 *rp++ = '\\';
                                437                 :             }
 4790 bruce                     438 CBC      108634 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
                                439             246 :             {
 4790 bruce                     440 ECB             :                 int         val;    /* holds unprintable chars */
                                441                 : 
 4790 bruce                     442 CBC         246 :                 val = *vp;
                                443             246 :                 rp[0] = '\\';
                                444             246 :                 rp[3] = DIG(val & 07);
                                445             246 :                 val >>= 3;
 4790 bruce                     446 GIC         246 :                 rp[2] = DIG(val & 07);
                                447             246 :                 val >>= 3;
 4790 bruce                     448 CBC         246 :                 rp[1] = DIG(val & 03);
 4790 bruce                     449 GIC         246 :                 rp += 4;
                                450                 :             }
                                451                 :             else
                                452          108388 :                 *rp++ = *vp;
 9345 bruce                     453 EUB             :         }
                                454                 :     }
                                455                 :     else
                                456                 :     {
 4996 tgl                       457 LBC           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
 4996 tgl                       458 ECB             :              bytea_output);
                                459                 :         rp = result = NULL;     /* keep compiler quiet */
                                460                 :     }
 9345 bruce                     461 GIC        6677 :     *rp = '\0';
 8289 tgl                       462            6677 :     PG_RETURN_CSTRING(result);
                                463                 : }
                                464                 : 
 7275 tgl                       465 ECB             : /*
                                466                 :  *      bytearecv           - converts external binary format to bytea
                                467                 :  */
                                468                 : Datum
 7275 tgl                       469 GIC         506 : bytearecv(PG_FUNCTION_ARGS)
                                470                 : {
 7275 tgl                       471 CBC         506 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
 7275 tgl                       472 ECB             :     bytea      *result;
                                473                 :     int         nbytes;
                                474                 : 
 7275 tgl                       475 CBC         506 :     nbytes = buf->len - buf->cursor;
 7275 tgl                       476 GIC         506 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
 5885                           477             506 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
 7275                           478             506 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
                                479             506 :     PG_RETURN_BYTEA_P(result);
                                480                 : }
                                481                 : 
                                482                 : /*
                                483                 :  *      byteasend           - converts bytea to binary format
 7275 tgl                       484 ECB             :  *
                                485                 :  * This is a special case: just copy the input...
                                486                 :  */
                                487                 : Datum
 7275 tgl                       488 CBC        2811 : byteasend(PG_FUNCTION_ARGS)
                                489                 : {
 7275 tgl                       490 GIC        2811 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
                                491                 : 
 7275 tgl                       492 CBC        2811 :     PG_RETURN_BYTEA_P(vlena);
                                493                 : }
                                494                 : 
                                495                 : Datum
 4013 peter_e                   496           46387 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
                                497                 : {
                                498                 :     StringInfo  state;
 4125 rhaas                     499 ECB             : 
 4125 rhaas                     500 GIC       46387 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
 4125 rhaas                     501 ECB             : 
                                502                 :     /* Append the value unless null, preceding it with the delimiter. */
 4125 rhaas                     503 GIC       46387 :     if (!PG_ARGISNULL(1))
                                504                 :     {
                                505           38887 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
   76 drowley                   506 GNC       38887 :         bool        isfirst = false;
                                507                 : 
                                508                 :         /*
                                509                 :          * You might think we can just throw away the first delimiter, however
                                510                 :          * we must keep it as we may be a parallel worker doing partial
                                511                 :          * aggregation building a state to send to the main process.  We need
                                512                 :          * to keep the delimiter of every aggregation so that the combine
                                513                 :          * function can properly join up the strings of two separately
                                514                 :          * partially aggregated results.  The first delimiter is only stripped
                                515                 :          * off in the final function.  To know how much to strip off the front
                                516                 :          * of the string, we store the length of the first delimiter in the
                                517                 :          * StringInfo's cursor field, which we don't otherwise need here.
                                518                 :          */
 4125 rhaas                     519 GIC       38887 :         if (state == NULL)
                                520                 :         {
                                521              93 :             state = makeStringAggState(fcinfo);
   76 drowley                   522 GNC          93 :             isfirst = true;
                                523                 :         }
                                524                 : 
                                525           38887 :         if (!PG_ARGISNULL(2))
                                526                 :         {
 4013 peter_e                   527 GIC       38881 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
                                528                 : 
   76 drowley                   529 GNC       38881 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
                                530           38881 :                                    VARSIZE_ANY_EXHDR(delim));
                                531           38881 :             if (isfirst)
                                532              90 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
 4013 peter_e                   533 ECB             :         }
                                534                 : 
   76 drowley                   535 GNC       38887 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
                                536           38887 :                                VARSIZE_ANY_EXHDR(value));
 4125 rhaas                     537 ECB             :     }
                                538                 : 
                                539                 :     /*
 4013 peter_e                   540                 :      * The transition type for string_agg() is declared to be "internal",
                                541                 :      * which is a pass-by-value type the same size as a pointer.
 4125 rhaas                     542                 :      */
   76 drowley                   543 GNC       46387 :     if (state)
                                544           46363 :         PG_RETURN_POINTER(state);
                                545              24 :     PG_RETURN_NULL();
 4125 rhaas                     546 ECB             : }
                                547                 : 
                                548                 : Datum
 4013 peter_e                   549 CBC          76 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
                                550                 : {
                                551                 :     StringInfo  state;
 4125 rhaas                     552 ECB             : 
                                553                 :     /* cannot be called directly because of internal-type argument */
 4125 rhaas                     554 GIC          76 :     Assert(AggCheckCallContext(fcinfo, NULL));
                                555                 : 
                                556              76 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                                557                 : 
                                558              76 :     if (state != NULL)
                                559                 :     {
                                560                 :         /* As per comment in transfn, strip data before the cursor position */
 4125 rhaas                     561 ECB             :         bytea      *result;
   76 drowley                   562 GNC          73 :         int         strippedlen = state->len - state->cursor;
 4125 rhaas                     563 ECB             : 
   76 drowley                   564 GNC          73 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
                                565              73 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
                                566              73 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
 4125 rhaas                     567 GIC          73 :         PG_RETURN_BYTEA_P(result);
 4125 rhaas                     568 ECB             :     }
                                569                 :     else
 4125 rhaas                     570 GIC           3 :         PG_RETURN_NULL();
                                571                 : }
                                572                 : 
 9770 scrappy                   573 ECB             : /*
                                574                 :  *      textin          - converts "..." to internal representation
                                575                 :  */
                                576                 : Datum
 8313 tgl                       577 CBC     8942398 : textin(PG_FUNCTION_ARGS)
                                578                 : {
 8313 tgl                       579 GIC     8942398 :     char       *inputText = PG_GETARG_CSTRING(0);
                                580                 : 
 5493 tgl                       581 CBC     8942398 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
                                582                 : }
 9770 scrappy                   583 ECB             : 
                                584                 : /*
 9345 bruce                     585                 :  *      textout         - converts internal representation to "..."
 9770 scrappy                   586                 :  */
                                587                 : Datum
 8313 tgl                       588 GIC     4169602 : textout(PG_FUNCTION_ARGS)
 9770 scrappy                   589 ECB             : {
 5493 tgl                       590 GIC     4169602 :     Datum       txt = PG_GETARG_DATUM(0);
                                591                 : 
                                592         4169602 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
                                593                 : }
                                594                 : 
                                595                 : /*
 7275 tgl                       596 ECB             :  *      textrecv            - converts external binary format to text
                                597                 :  */
                                598                 : Datum
 7275 tgl                       599 GIC       53360 : textrecv(PG_FUNCTION_ARGS)
 7275 tgl                       600 ECB             : {
 7275 tgl                       601 GIC       53360 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
                                602                 :     text       *result;
                                603                 :     char       *str;
                                604                 :     int         nbytes;
                                605                 : 
                                606           53360 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 6482 tgl                       607 ECB             : 
 5493 tgl                       608 GIC       53360 :     result = cstring_to_text_with_len(str, nbytes);
 7275 tgl                       609 CBC       53360 :     pfree(str);
 7275 tgl                       610 GIC       53360 :     PG_RETURN_TEXT_P(result);
 7275 tgl                       611 ECB             : }
                                612                 : 
                                613                 : /*
                                614                 :  *      textsend            - converts text to binary format
                                615                 :  */
                                616                 : Datum
 7275 tgl                       617 GIC       34115 : textsend(PG_FUNCTION_ARGS)
 7275 tgl                       618 ECB             : {
 5847 tgl                       619 GIC       34115 :     text       *t = PG_GETARG_TEXT_PP(0);
 7275 tgl                       620 ECB             :     StringInfoData buf;
                                621                 : 
 7275 tgl                       622 GIC       34115 :     pq_begintypsend(&buf);
 5847                           623           34115 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 7275                           624           34115 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 7275 tgl                       625 ECB             : }
                                626                 : 
 9770 scrappy                   627                 : 
 7655 bruce                     628                 : /*
                                629                 :  *      unknownin           - converts "..." to internal representation
                                630                 :  */
                                631                 : Datum
 7655 bruce                     632 UIC           0 : unknownin(PG_FUNCTION_ARGS)
                                633                 : {
 6523 tgl                       634               0 :     char       *str = PG_GETARG_CSTRING(0);
                                635                 : 
 6523 tgl                       636 ECB             :     /* representation is same as cstring */
 6523 tgl                       637 UIC           0 :     PG_RETURN_CSTRING(pstrdup(str));
 7655 bruce                     638 ECB             : }
                                639                 : 
                                640                 : /*
                                641                 :  *      unknownout          - converts internal representation to "..."
                                642                 :  */
                                643                 : Datum
 7655 bruce                     644 GIC         340 : unknownout(PG_FUNCTION_ARGS)
                                645                 : {
                                646                 :     /* representation is same as cstring */
 6523 tgl                       647             340 :     char       *str = PG_GETARG_CSTRING(0);
                                648                 : 
                                649             340 :     PG_RETURN_CSTRING(pstrdup(str));
                                650                 : }
 7655 bruce                     651 EUB             : 
                                652                 : /*
 7275 tgl                       653                 :  *      unknownrecv         - converts external binary format to unknown
                                654                 :  */
                                655                 : Datum
 7275 tgl                       656 UBC           0 : unknownrecv(PG_FUNCTION_ARGS)
                                657                 : {
 7275 tgl                       658 UIC           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
                                659                 :     char       *str;
                                660                 :     int         nbytes;
                                661                 : 
 6523                           662               0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 6523 tgl                       663 ECB             :     /* representation is same as cstring */
 6523 tgl                       664 UIC           0 :     PG_RETURN_CSTRING(str);
                                665                 : }
 7275 tgl                       666 ECB             : 
                                667                 : /*
                                668                 :  *      unknownsend         - converts unknown to binary format
                                669                 :  */
                                670                 : Datum
 7275 tgl                       671 UIC           0 : unknownsend(PG_FUNCTION_ARGS)
                                672                 : {
                                673                 :     /* representation is same as cstring */
 6523                           674               0 :     char       *str = PG_GETARG_CSTRING(0);
 6523 tgl                       675 EUB             :     StringInfoData buf;
                                676                 : 
 6523 tgl                       677 UBC           0 :     pq_begintypsend(&buf);
 6523 tgl                       678 UIC           0 :     pq_sendtext(&buf, str, strlen(str));
                                679               0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
                                680                 : }
 7275 tgl                       681 EUB             : 
                                682                 : 
 9770 scrappy                   683                 : /* ========== PUBLIC ROUTINES ========== */
                                684                 : 
                                685                 : /*
                                686                 :  * textlen -
                                687                 :  *    returns the logical length of a text*
                                688                 :  *     (which is less than the VARSIZE of the text*)
                                689                 :  */
 8312 tgl                       690                 : Datum
 8312 tgl                       691 GIC      215278 : textlen(PG_FUNCTION_ARGS)
                                692                 : {
 7008 tgl                       693 GBC      215278 :     Datum       str = PG_GETARG_DATUM(0);
                                694                 : 
                                695                 :     /* try to avoid decompressing argument */
                                696          215278 :     PG_RETURN_INT32(text_length(str));
 7535 bruce                     697 EUB             : }
 8053                           698                 : 
                                699                 : /*
                                700                 :  * text_length -
                                701                 :  *  Does the real work for textlen()
                                702                 :  *
                                703                 :  *  This is broken out so it can be called directly by other string processing
                                704                 :  *  functions.  Note that the argument is passed as a Datum, to indicate that
                                705                 :  *  it may still be in compressed form.  We can avoid decompressing it at all
                                706                 :  *  in some cases.
                                707                 :  */
                                708                 : static int32
 7535 bruce                     709 GIC      215284 : text_length(Datum str)
 7535 bruce                     710 ECB             : {
                                711                 :     /* fastpath when max encoding length is one */
 7535 bruce                     712 CBC      215284 :     if (pg_database_encoding_max_length() == 1)
 7535 bruce                     713 GIC          10 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
                                714                 :     else
 7535 bruce                     715 ECB             :     {
 5847 tgl                       716 GIC      215274 :         text       *t = DatumGetTextPP(str);
                                717                 : 
 5624 bruce                     718          215274 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
                                719                 :                                              VARSIZE_ANY_EXHDR(t)));
                                720                 :     }
                                721                 : }
                                722                 : 
                                723                 : /*
                                724                 :  * textoctetlen -
                                725                 :  *    returns the physical length of a text*
                                726                 :  *     (which is less than the VARSIZE of the text*)
                                727                 :  */
 8312 tgl                       728 ECB             : Datum
 8312 tgl                       729 GIC          35 : textoctetlen(PG_FUNCTION_ARGS)
                                730                 : {
 7008 tgl                       731 CBC          35 :     Datum       str = PG_GETARG_DATUM(0);
 7008 tgl                       732 ECB             : 
                                733                 :     /* We need not detoast the input at all */
 7008 tgl                       734 GIC          35 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 8312 tgl                       735 ECB             : }
                                736                 : 
 9770 scrappy                   737                 : /*
                                738                 :  * textcat -
                                739                 :  *    takes two text* and returns a text* that is the concatenation of
                                740                 :  *    the two.
                                741                 :  *
                                742                 :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
                                743                 :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
                                744                 :  * Allocate space for output in all cases.
                                745                 :  * XXX - thomas 1997-07-10
                                746                 :  */
                                747                 : Datum
 8312 tgl                       748 CBC     1335495 : textcat(PG_FUNCTION_ARGS)
                                749                 : {
 5847                           750         1335495 :     text       *t1 = PG_GETARG_TEXT_PP(0);
 5847 tgl                       751 GIC     1335495 :     text       *t2 = PG_GETARG_TEXT_PP(1);
                                752                 : 
 4822 tgl                       753 CBC     1335495 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
                                754                 : }
                                755                 : 
                                756                 : /*
                                757                 :  * text_catenate
                                758                 :  *  Guts of textcat(), broken out so it can be used by other functions
                                759                 :  *
                                760                 :  * Arguments can be in short-header form, but not compressed or out-of-line
                                761                 :  */
                                762                 : static text *
 4822 tgl                       763 GIC     1335535 : text_catenate(text *t1, text *t2)
                                764                 : {
                                765                 :     text       *result;
                                766                 :     int         len1,
 9344 bruce                     767 ECB             :                 len2,
                                768                 :                 len;
 8312 tgl                       769                 :     char       *ptr;
 9770 scrappy                   770                 : 
 5847 tgl                       771 GIC     1335535 :     len1 = VARSIZE_ANY_EXHDR(t1);
 4822 tgl                       772 CBC     1335535 :     len2 = VARSIZE_ANY_EXHDR(t2);
                                773                 : 
                                774                 :     /* paranoia ... probably should throw error instead? */
 9345 bruce                     775 GIC     1335535 :     if (len1 < 0)
 9345 bruce                     776 UIC           0 :         len1 = 0;
 9345 bruce                     777 GIC     1335535 :     if (len2 < 0)
 9345 bruce                     778 UIC           0 :         len2 = 0;
                                779                 : 
 9101 lockhart                  780 GIC     1335535 :     len = len1 + len2 + VARHDRSZ;
 8312 tgl                       781         1335535 :     result = (text *) palloc(len);
 9101 lockhart                  782 ECB             : 
                                783                 :     /* Set size of result string... */
 5885 tgl                       784 GIC     1335535 :     SET_VARSIZE(result, len);
                                785                 : 
                                786                 :     /* Fill data field of result string... */
 9345 bruce                     787         1335535 :     ptr = VARDATA(result);
 9101 lockhart                  788         1335535 :     if (len1 > 0)
 5847 tgl                       789         1333907 :         memcpy(ptr, VARDATA_ANY(t1), len1);
 9101 lockhart                  790 CBC     1335535 :     if (len2 > 0)
 5847 tgl                       791         1335430 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
                                792                 : 
 4822 tgl                       793 GIC     1335535 :     return result;
 8312 tgl                       794 ECB             : }
 9229 lockhart                  795 EUB             : 
 5996 tgl                       796 ECB             : /*
 5996 tgl                       797 EUB             :  * charlen_to_bytelen()
                                798                 :  *  Compute the number of bytes occupied by n characters starting at *p
 5996 tgl                       799 ECB             :  *
                                800                 :  * It is caller's responsibility that there actually are n characters;
                                801                 :  * the string need not be null-terminated.
                                802                 :  */
                                803                 : static int
 5996 tgl                       804 GIC        5457 : charlen_to_bytelen(const char *p, int n)
                                805                 : {
 5996 tgl                       806 CBC        5457 :     if (pg_database_encoding_max_length() == 1)
 5996 tgl                       807 ECB             :     {
                                808                 :         /* Optimization for single-byte encodings */
 5996 tgl                       809 LBC           0 :         return n;
 5996 tgl                       810 ECB             :     }
                                811                 :     else
                                812                 :     {
                                813                 :         const char *s;
                                814                 : 
 5996 tgl                       815 GIC     2960165 :         for (s = p; n > 0; n--)
                                816         2954708 :             s += pg_mblen(s);
                                817                 : 
                                818            5457 :         return s - p;
                                819                 :     }
                                820                 : }
                                821                 : 
                                822                 : /*
 9229 lockhart                  823 ECB             :  * text_substr()
                                824                 :  * Return a substring starting at the specified position.
                                825                 :  * - thomas 1997-12-31
                                826                 :  *
                                827                 :  * Input:
 9173 bruce                     828 EUB             :  *  - string
                                829                 :  *  - starting position (is one-based)
                                830                 :  *  - string length
                                831                 :  *
                                832                 :  * If the starting position is zero or less, then return from the start of the string
                                833                 :  *  adjusting the length to be consistent with the "negative start" per SQL.
 9229 lockhart                  834 ECB             :  * If the length is less than zero, return the remaining string.
                                835                 :  *
                                836                 :  * Added multibyte support.
 9113 scrappy                   837                 :  * - Tatsuo Ishii 1998-4-21
                                838                 :  * Changed behavior if starting position is less than one to conform to SQL behavior.
                                839                 :  * Formerly returned the entire string; now returns a portion.
                                840                 :  * - Thomas Lockhart 1998-12-10
                                841                 :  * Now uses faster TOAST-slicing interface
                                842                 :  * - John Gray 2002-02-22
                                843                 :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
                                844                 :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
                                845                 :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
                                846                 :  * S > LC and < LC + 4 sometimes garbage characters are returned.
                                847                 :  * - Joe Conway 2002-08-10
                                848                 :  */
                                849                 : Datum
 8335 tgl                       850 GIC      374965 : text_substr(PG_FUNCTION_ARGS)
                                851                 : {
 7535 bruce                     852          374965 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
                                853                 :                                     PG_GETARG_INT32(1),
                                854                 :                                     PG_GETARG_INT32(2),
                                855                 :                                     false));
                                856                 : }
                                857                 : 
                                858                 : /*
                                859                 :  * text_substr_no_len -
                                860                 :  *    Wrapper to avoid opr_sanity failure due to
                                861                 :  *    one function accepting a different number of args.
                                862                 :  */
                                863                 : Datum
                                864              24 : text_substr_no_len(PG_FUNCTION_ARGS)
                                865                 : {
                                866              24 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
                                867                 :                                     PG_GETARG_INT32(1),
                                868                 :                                     -1, true));
 7535 bruce                     869 ECB             : }
                                870                 : 
                                871                 : /*
                                872                 :  * text_substring -
                                873                 :  *  Does the real work for text_substr() and text_substr_no_len()
                                874                 :  *
                                875                 :  *  This is broken out so it can be called directly by other string processing
                                876                 :  *  functions.  Note that the argument is passed as a Datum, to indicate that
                                877                 :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
                                878                 :  *  of it in some cases.
                                879                 :  *
                                880                 :  *  The result is always a freshly palloc'd datum.
                                881                 :  */
                                882                 : static text *
 7535 bruce                     883 CBC      394913 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                                884                 : {
                                885          394913 :     int32       eml = pg_database_encoding_max_length();
 7522 bruce                     886 GIC      394913 :     int32       S = start;      /* start position */
                                887                 :     int32       S1;             /* adjusted start position */
                                888                 :     int32       L1;             /* adjusted substring length */
                                889                 :     int32       E;              /* end position */
                                890                 : 
                                891                 :     /*
                                892                 :      * SQL99 says S can be zero or negative, but we still must fetch from the
                                893                 :      * start of the string.
                                894                 :      */
  825 tgl                       895          394913 :     S1 = Max(S, 1);
                                896                 : 
                                897                 :     /* life is easy if the encoding max length is 1 */
 7535 bruce                     898          394913 :     if (eml == 1)
                                899                 :     {
 2118 tgl                       900               6 :         if (length_not_specified)   /* special case - get length to end of
                                901                 :                                      * string */
 7535 bruce                     902 LBC           0 :             L1 = -1;
  825 tgl                       903 GIC           6 :         else if (length < 0)
  825 tgl                       904 ECB             :         {
                                905                 :             /* SQL99 says to throw an error for E < S, i.e., negative length */
  825 tgl                       906 UIC           0 :             ereport(ERROR,
                                907                 :                     (errcode(ERRCODE_SUBSTRING_ERROR),
                                908                 :                      errmsg("negative substring length not allowed")));
                                909                 :             L1 = -1;            /* silence stupider compilers */
                                910                 :         }
  825 tgl                       911 GIC           6 :         else if (pg_add_s32_overflow(S, length, &E))
                                912                 :         {
                                913                 :             /*
  825 tgl                       914 ECB             :              * L could be large enough for S + L to overflow, in which case
                                915                 :              * the substring must run to end of string.
                                916                 :              */
  825 tgl                       917 LBC           0 :             L1 = -1;
                                918                 :         }
  825 tgl                       919 ECB             :         else
                                920                 :         {
 7522 bruce                     921 EUB             :             /*
 6385 bruce                     922 ECB             :              * A zero or negative value for the end position can happen if the
                                923                 :              * start was negative or one. SQL99 says to return a zero-length
                                924                 :              * string.
 7535 bruce                     925 EUB             :              */
 7535 bruce                     926 GIC           6 :             if (E < 1)
 5493 tgl                       927 UIC           0 :                 return cstring_to_text("");
                                928                 : 
 7535 bruce                     929 GIC           6 :             L1 = E - S1;
 7535 bruce                     930 ECB             :         }
                                931                 : 
                                932                 :         /*
                                933                 :          * If the start position is past the end of the string, SQL99 says to
                                934                 :          * return a zero-length string -- DatumGetTextPSlice() will do that
                                935                 :          * for us.  We need only convert S1 to zero-based starting position.
 7535 bruce                     936 EUB             :          */
 7535 bruce                     937 GIC           6 :         return DatumGetTextPSlice(str, S1 - 1, L1);
                                938                 :     }
                                939          394907 :     else if (eml > 1)
                                940                 :     {
                                941                 :         /*
                                942                 :          * When encoding max length is > 1, we can't get LC without
                                943                 :          * detoasting, so we'll grab a conservatively large slice now and go
                                944                 :          * back later to do the right thing
 7535 bruce                     945 ECB             :          */
 7535 bruce                     946 EUB             :         int32       slice_start;
                                947                 :         int32       slice_size;
 7535 bruce                     948 ECB             :         int32       slice_strlen;
                                949                 :         text       *slice;
                                950                 :         int32       E1;
                                951                 :         int32       i;
                                952                 :         char       *p;
                                953                 :         char       *s;
                                954                 :         text       *ret;
                                955                 : 
                                956                 :         /*
                                957                 :          * We need to start at position zero because there is no way to know
 6385                           958                 :          * in advance which byte offset corresponds to the supplied start
                                959                 :          * position.
                                960                 :          */
 7535 bruce                     961 GIC      394907 :         slice_start = 0;
                                962                 : 
 2118 tgl                       963          394907 :         if (length_not_specified)   /* special case - get length to end of
                                964                 :                                      * string */
 7535 bruce                     965              44 :             slice_size = L1 = -1;
  825 tgl                       966          394863 :         else if (length < 0)
                                967                 :         {
                                968                 :             /* SQL99 says to throw an error for E < S, i.e., negative length */
                                969               6 :             ereport(ERROR,
                                970                 :                     (errcode(ERRCODE_SUBSTRING_ERROR),
                                971                 :                      errmsg("negative substring length not allowed")));
                                972                 :             slice_size = L1 = -1;   /* silence stupider compilers */
                                973                 :         }
                                974          394857 :         else if (pg_add_s32_overflow(S, length, &E))
                                975                 :         {
                                976                 :             /*
                                977                 :              * L could be large enough for S + L to overflow, in which case
                                978                 :              * the substring must run to end of string.
                                979                 :              */
  825 tgl                       980 CBC           3 :             slice_size = L1 = -1;
                                981                 :         }
  825 tgl                       982 ECB             :         else
                                983                 :         {
 7522 bruce                     984                 :             /*
 6385                           985                 :              * A zero or negative value for the end position can happen if the
                                986                 :              * start was negative or one. SQL99 says to return a zero-length
                                987                 :              * string.
 7535                           988                 :              */
 7535 bruce                     989 GIC      394854 :             if (E < 1)
 5493 tgl                       990 UIC           0 :                 return cstring_to_text("");
                                991                 : 
                                992                 :             /*
 7522 bruce                     993 ECB             :              * if E is past the end of the string, the tuple toaster will
                                994                 :              * truncate the length for us
                                995                 :              */
 7535 bruce                     996 GIC      394854 :             L1 = E - S1;
                                997                 : 
                                998                 :             /*
 6385 bruce                     999 ECB             :              * Total slice size in bytes can't be any longer than the start
                               1000                 :              * position plus substring length times the encoding max length.
                               1001                 :              * If that overflows, we can just use -1.
                               1002                 :              */
  825 tgl                      1003 GIC      394854 :             if (pg_mul_s32_overflow(E, eml, &slice_size))
                               1004               3 :                 slice_size = -1;
                               1005                 :         }
                               1006                 : 
                               1007                 :         /*
 5624 bruce                    1008 ECB             :          * If we're working with an untoasted source, no need to do an extra
 5624 bruce                    1009 EUB             :          * copying step.
                               1010                 :          */
 5327 tgl                      1011 GIC      394901 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
 5475                          1012          394874 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
 5996                          1013             162 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
                               1014                 :         else
 5996 tgl                      1015 CBC      394739 :             slice = (text *) DatumGetPointer(str);
                               1016                 : 
                               1017                 :         /* see if we got back an empty string */
 5678 tgl                      1018 GIC      394901 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
                               1019                 :         {
 5996 tgl                      1020 UIC           0 :             if (slice != (text *) DatumGetPointer(str))
                               1021               0 :                 pfree(slice);
 5493 tgl                      1022 LBC           0 :             return cstring_to_text("");
 5996 tgl                      1023 ECB             :         }
                               1024                 : 
                               1025                 :         /* Now we can get the actual length of the slice in MB characters */
 5678 tgl                      1026 GIC      394901 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
                               1027          394901 :                                             VARSIZE_ANY_EXHDR(slice));
                               1028                 : 
                               1029                 :         /*
 6385 bruce                    1030 ECB             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
                               1031                 :          * says to return a zero-length string.
 7535                          1032                 :          */
 7535 bruce                    1033 GIC      394901 :         if (S1 > slice_strlen)
 5996 tgl                      1034 ECB             :         {
 5996 tgl                      1035 GIC          11 :             if (slice != (text *) DatumGetPointer(str))
 5996 tgl                      1036 UIC           0 :                 pfree(slice);
 5493 tgl                      1037 CBC          11 :             return cstring_to_text("");
                               1038                 :         }
 9229 lockhart                 1039 EUB             : 
 7535 bruce                    1040                 :         /*
 6385                          1041                 :          * Adjust L1 and E1 now that we know the slice string length. Again
                               1042                 :          * remember that S1 is one based, and slice_start is zero based.
                               1043                 :          */
 7535 bruce                    1044 GIC      394890 :         if (L1 > -1)
 7522 bruce                    1045 CBC      394854 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 7535 bruce                    1046 ECB             :         else
 7535 bruce                    1047 GIC          36 :             E1 = slice_start + 1 + slice_strlen;
                               1048                 : 
                               1049                 :         /*
                               1050                 :          * Find the start position in the slice; remember S1 is not zero based
                               1051                 :          */
 5678 tgl                      1052 CBC      394890 :         p = VARDATA_ANY(slice);
 7535 bruce                    1053 GIC     2805764 :         for (i = 0; i < S1 - 1; i++)
 7535 bruce                    1054 CBC     2410874 :             p += pg_mblen(p);
 7535 bruce                    1055 EUB             : 
 7535 bruce                    1056 ECB             :         /* hang onto a pointer to our start position */
 7535 bruce                    1057 GIC      394890 :         s = p;
                               1058                 : 
                               1059                 :         /*
                               1060                 :          * Count the actual bytes used by the substring of the requested
                               1061                 :          * length.
                               1062                 :          */
 7535 bruce                    1063 CBC     4855113 :         for (i = S1; i < E1; i++)
                               1064         4460223 :             p += pg_mblen(p);
                               1065                 : 
                               1066          394890 :         ret = (text *) palloc(VARHDRSZ + (p - s));
 5885 tgl                      1067 GIC      394890 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
 7535 bruce                    1068          394890 :         memcpy(VARDATA(ret), s, (p - s));
                               1069                 : 
 5996 tgl                      1070          394890 :         if (slice != (text *) DatumGetPointer(str))
 5996 tgl                      1071 CBC         162 :             pfree(slice);
 5996 tgl                      1072 ECB             : 
 7535 bruce                    1073 CBC      394890 :         return ret;
                               1074                 :     }
                               1075                 :     else
 7196 tgl                      1076 LBC           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
                               1077                 : 
                               1078                 :     /* not reached: suppress compiler warning */
                               1079                 :     return NULL;
                               1080                 : }
                               1081                 : 
 4822 tgl                      1082 ECB             : /*
                               1083                 :  * textoverlay
                               1084                 :  *  Replace specified substring of first string with second
                               1085                 :  *
                               1086                 :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
                               1087                 :  * This code is a direct implementation of what the standard says.
                               1088                 :  */
                               1089                 : Datum
 4822 tgl                      1090 CBC          14 : textoverlay(PG_FUNCTION_ARGS)
                               1091                 : {
                               1092              14 :     text       *t1 = PG_GETARG_TEXT_PP(0);
 4822 tgl                      1093 GIC          14 :     text       *t2 = PG_GETARG_TEXT_PP(1);
 2118                          1094              14 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
 2118 tgl                      1095 GBC          14 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
                               1096                 : 
 4822 tgl                      1097 GIC          14 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
                               1098                 : }
                               1099                 : 
                               1100                 : Datum
                               1101               6 : textoverlay_no_len(PG_FUNCTION_ARGS)
                               1102                 : {
                               1103               6 :     text       *t1 = PG_GETARG_TEXT_PP(0);
                               1104               6 :     text       *t2 = PG_GETARG_TEXT_PP(1);
 2118                          1105               6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
                               1106                 :     int         sl;
                               1107                 : 
                               1108               6 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
 4822 tgl                      1109 CBC           6 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
                               1110                 : }
 4822 tgl                      1111 ECB             : 
                               1112                 : static text *
 4822 tgl                      1113 CBC          20 : text_overlay(text *t1, text *t2, int sp, int sl)
 4822 tgl                      1114 ECB             : {
                               1115                 :     text       *result;
                               1116                 :     text       *s1;
                               1117                 :     text       *s2;
                               1118                 :     int         sp_pl_sl;
                               1119                 : 
                               1120                 :     /*
                               1121                 :      * Check for possible integer-overflow cases.  For negative sp, throw a
 4790 bruce                    1122                 :      * "substring length" error because that's what should be expected
                               1123                 :      * according to the spec's definition of OVERLAY().
 4822 tgl                      1124                 :      */
 4822 tgl                      1125 GIC          20 :     if (sp <= 0)
 4822 tgl                      1126 UIC           0 :         ereport(ERROR,
 4822 tgl                      1127 ECB             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
                               1128                 :                  errmsg("negative substring length not allowed")));
 1944 andres                   1129 GIC          20 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
 4822 tgl                      1130 UIC           0 :         ereport(ERROR,
                               1131                 :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 4822 tgl                      1132 ECB             :                  errmsg("integer out of range")));
                               1133                 : 
 4790 bruce                    1134 GIC          20 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
 4822 tgl                      1135              20 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
                               1136              20 :     result = text_catenate(s1, t2);
                               1137              20 :     result = text_catenate(result, s2);
                               1138                 : 
                               1139              20 :     return result;
                               1140                 : }
                               1141                 : 
                               1142                 : /*
                               1143                 :  * textpos -
 9345 bruce                    1144 ECB             :  *    Return the position of the specified substring.
 3641 peter_e                  1145 EUB             :  *    Implements the SQL POSITION() function.
                               1146                 :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
                               1147                 :  * - thomas 1997-07-27
 9385 lockhart                 1148 ECB             :  */
 8312 tgl                      1149 EUB             : Datum
 8312 tgl                      1150 GIC          53 : textpos(PG_FUNCTION_ARGS)
                               1151                 : {
 5678                          1152              53 :     text       *str = PG_GETARG_TEXT_PP(0);
 5678 tgl                      1153 CBC          53 :     text       *search_str = PG_GETARG_TEXT_PP(1);
 7008 tgl                      1154 ECB             : 
 1479 peter                    1155 CBC          53 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
 7535 bruce                    1156 ECB             : }
                               1157                 : 
                               1158                 : /*
                               1159                 :  * text_position -
                               1160                 :  *  Does the real work for textpos()
                               1161                 :  *
                               1162                 :  * Inputs:
                               1163                 :  *      t1 - string to be searched
                               1164                 :  *      t2 - pattern to match within t1
                               1165                 :  * Result:
                               1166                 :  *      Character index of the first matched char, starting from 1,
                               1167                 :  *      or 0 if no match.
                               1168                 :  *
                               1169                 :  *  This is broken out so it can be called directly by other string processing
                               1170                 :  *  functions.
                               1171                 :  */
 6028 tgl                      1172                 : static int
 1479 peter                    1173 GIC          53 : text_position(text *t1, text *t2, Oid collid)
 7535 bruce                    1174 ECB             : {
                               1175                 :     TextPositionState state;
                               1176                 :     int         result;
                               1177                 : 
                               1178                 :     /* Empty needle always matches at position 1 */
 1259 tgl                      1179 GIC          53 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
                               1180               6 :         return 1;
                               1181                 : 
                               1182                 :     /* Otherwise, can't match if haystack is shorter than needle */
                               1183              47 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
 1535 heikki.linnakangas       1184              11 :         return 0;
                               1185                 : 
 1479 peter                    1186              36 :     text_position_setup(t1, t2, collid, &state);
 1535 heikki.linnakangas       1187              36 :     if (!text_position_next(&state))
                               1188              12 :         result = 0;
                               1189                 :     else
                               1190              24 :         result = text_position_get_match_pos(&state);
 6028 tgl                      1191              36 :     text_position_cleanup(&state);
 6028 tgl                      1192 CBC          36 :     return result;
                               1193                 : }
                               1194                 : 
                               1195                 : 
                               1196                 : /*
                               1197                 :  * text_position_setup, text_position_next, text_position_cleanup -
 6028 tgl                      1198 ECB             :  *  Component steps of text_position()
                               1199                 :  *
                               1200                 :  * These are broken out so that a string can be efficiently searched for
                               1201                 :  * multiple occurrences of the same pattern.  text_position_next may be
 1535 heikki.linnakangas       1202                 :  * called multiple times, and it advances to the next match on each call.
                               1203                 :  * text_position_get_match_ptr() and text_position_get_match_pos() return
                               1204                 :  * a pointer or 1-based character position of the last match, respectively.
                               1205                 :  *
                               1206                 :  * The "state" variable is normally just a local variable in the caller.
                               1207                 :  *
                               1208                 :  * NOTE: text_position_next skips over the matched portion.  For example,
                               1209                 :  * searching for "xx" in "xxx" returns only one match, not two.
 6028 tgl                      1210                 :  */
 9345 bruce                    1211                 : 
                               1212                 : static void
 1479 peter                    1213 GIC        1405 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
                               1214                 : {
 5678 tgl                      1215            1405 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
                               1216            1405 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
 1418                          1217            1405 :     pg_locale_t mylocale = 0;
                               1218                 : 
 1479 peter                    1219            1405 :     check_collation_set(collid);
                               1220                 : 
  444                          1221            1405 :     if (!lc_collate_is_c(collid))
 1479                          1222             140 :         mylocale = pg_newlocale_from_collation(collid);
                               1223                 : 
   45 jdavis                   1224 GNC        1405 :     if (!pg_locale_deterministic(mylocale))
 1479 peter                    1225 GIC           6 :         ereport(ERROR,
                               1226                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                               1227                 :                  errmsg("nondeterministic collations are not supported for substring searches")));
                               1228                 : 
 1535 heikki.linnakangas       1229            1399 :     Assert(len1 > 0);
                               1230            1399 :     Assert(len2 > 0);
                               1231                 : 
 1535 heikki.linnakangas       1232 ECB             :     /*
                               1233                 :      * Even with a multi-byte encoding, we perform the search using the raw
                               1234                 :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
                               1235                 :      * because in UTF-8 the byte sequence of one character cannot contain
                               1236                 :      * another character.  For other multi-byte encodings, we do the search
                               1237                 :      * initially as a simple byte search, ignoring multibyte issues, but
                               1238                 :      * verify afterwards that the match we found is at a character boundary,
                               1239                 :      * and continue the search if it was a false match.
                               1240                 :      */
 7008 tgl                      1241 CBC        1399 :     if (pg_database_encoding_max_length() == 1)
 1535 heikki.linnakangas       1242 GIC          18 :         state->is_multibyte_char_in_char = false;
 1535 heikki.linnakangas       1243 CBC        1381 :     else if (GetDatabaseEncoding() == PG_UTF8)
                               1244            1381 :         state->is_multibyte_char_in_char = false;
                               1245                 :     else
 1535 heikki.linnakangas       1246 UIC           0 :         state->is_multibyte_char_in_char = true;
                               1247                 : 
 1535 heikki.linnakangas       1248 CBC        1399 :     state->str1 = VARDATA_ANY(t1);
                               1249            1399 :     state->str2 = VARDATA_ANY(t2);
 1535 heikki.linnakangas       1250 GIC        1399 :     state->len1 = len1;
                               1251            1399 :     state->len2 = len2;
                               1252            1399 :     state->last_match = NULL;
                               1253            1399 :     state->refpoint = state->str1;
                               1254            1399 :     state->refpos = 0;
                               1255                 : 
                               1256                 :     /*
                               1257                 :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
                               1258                 :      * notes we use the terminology that the "haystack" is the string to be
                               1259                 :      * searched (t1) and the "needle" is the pattern being sought (t2).
 5327 tgl                      1260 ECB             :      *
                               1261                 :      * If the needle is empty or bigger than the haystack then there is no
 3260 bruce                    1262                 :      * point in wasting cycles initializing the table.  We also choose not to
 5050                          1263                 :      * use B-M-H for needles of length 1, since the skip table can't possibly
                               1264                 :      * save anything in that case.
 5327 tgl                      1265 EUB             :      */
 5327 tgl                      1266 GIC        1399 :     if (len1 >= len2 && len2 > 1)
 5327 tgl                      1267 ECB             :     {
 5050 bruce                    1268 CBC        1295 :         int         searchlength = len1 - len2;
 5050 bruce                    1269 ECB             :         int         skiptablemask;
                               1270                 :         int         last;
                               1271                 :         int         i;
 1535 heikki.linnakangas       1272 CBC        1295 :         const char *str2 = state->str2;
 5327 tgl                      1273 ECB             : 
                               1274                 :         /*
                               1275                 :          * First we must determine how much of the skip table to use.  The
                               1276                 :          * declaration of TextPositionState allows up to 256 elements, but for
                               1277                 :          * short search problems we don't really want to have to initialize so
                               1278                 :          * many elements --- it would take too long in comparison to the
                               1279                 :          * actual search time.  So we choose a useful skip table size based on
                               1280                 :          * the haystack length minus the needle length.  The closer the needle
                               1281                 :          * length is to the haystack length the less useful skipping becomes.
                               1282                 :          *
                               1283                 :          * Note: since we use bit-masking to select table elements, the skip
                               1284                 :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
                               1285                 :          */
 5327 tgl                      1286 GIC        1295 :         if (searchlength < 16)
 5327 tgl                      1287 CBC          27 :             skiptablemask = 3;
 5327 tgl                      1288 GIC        1268 :         else if (searchlength < 64)
                               1289               8 :             skiptablemask = 7;
                               1290            1260 :         else if (searchlength < 128)
 5327 tgl                      1291 CBC           7 :             skiptablemask = 15;
 5327 tgl                      1292 GIC        1253 :         else if (searchlength < 512)
                               1293              95 :             skiptablemask = 31;
                               1294            1158 :         else if (searchlength < 2048)
                               1295            1096 :             skiptablemask = 63;
                               1296              62 :         else if (searchlength < 4096)
                               1297              28 :             skiptablemask = 127;
                               1298                 :         else
                               1299              34 :             skiptablemask = 255;
                               1300            1295 :         state->skiptablemask = skiptablemask;
                               1301                 : 
                               1302                 :         /*
                               1303                 :          * Initialize the skip table.  We set all elements to the needle
                               1304                 :          * length, since this is the correct skip distance for any character
 5327 tgl                      1305 ECB             :          * not found in the needle.
                               1306                 :          */
 5327 tgl                      1307 CBC       87051 :         for (i = 0; i <= skiptablemask; i++)
                               1308           85756 :             state->skiptable[i] = len2;
 5327 tgl                      1309 ECB             : 
                               1310                 :         /*
 3260 bruce                    1311                 :          * Now examine the needle.  For each character except the last one,
 5327 tgl                      1312                 :          * set the corresponding table element to the appropriate skip
                               1313                 :          * distance.  Note that when two characters share the same skip table
 5050 bruce                    1314                 :          * entry, the one later in the needle must determine the skip
                               1315                 :          * distance.
 5327 tgl                      1316                 :          */
 5327 tgl                      1317 GIC        1295 :         last = len2 - 1;
 5327 tgl                      1318 ECB             : 
 1535 heikki.linnakangas       1319 CBC       16263 :         for (i = 0; i < last; i++)
 1535 heikki.linnakangas       1320 GIC       14968 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
                               1321                 :     }
 6028 tgl                      1322            1399 : }
                               1323                 : 
                               1324                 : /*
                               1325                 :  * Advance to the next match, starting from the end of the previous match
 1535 heikki.linnakangas       1326 ECB             :  * (or the beginning of the string, on first call).  Returns true if a match
                               1327                 :  * is found.
                               1328                 :  *
                               1329                 :  * Note that this refuses to match an empty-string needle.  Most callers
                               1330                 :  * will have handled that case specially and we'll never see it here.
                               1331                 :  */
                               1332                 : static bool
 1535 heikki.linnakangas       1333 GIC        4789 : text_position_next(TextPositionState *state)
                               1334                 : {
 5327 tgl                      1335            4789 :     int         needle_len = state->len2;
 1535 heikki.linnakangas       1336 ECB             :     char       *start_ptr;
                               1337                 :     char       *matchptr;
 6028 tgl                      1338                 : 
 5327 tgl                      1339 CBC        4789 :     if (needle_len <= 0)
 1535 heikki.linnakangas       1340 UIC           0 :         return false;           /* result for empty pattern */
 6028 tgl                      1341 ECB             : 
                               1342                 :     /* Start from the point right after the previous match. */
 1535 heikki.linnakangas       1343 GIC        4789 :     if (state->last_match)
                               1344            3384 :         start_ptr = state->last_match + needle_len;
                               1345                 :     else
                               1346            1405 :         start_ptr = state->str1;
                               1347                 : 
                               1348            4789 : retry:
                               1349            4789 :     matchptr = text_position_next_internal(start_ptr, state);
                               1350                 : 
                               1351            4789 :     if (!matchptr)
 1535 heikki.linnakangas       1352 CBC        1369 :         return false;
                               1353                 : 
 1535 heikki.linnakangas       1354 ECB             :     /*
                               1355                 :      * Found a match for the byte sequence.  If this is a multibyte encoding,
                               1356                 :      * where one character's byte sequence can appear inside a longer
                               1357                 :      * multi-byte character, we need to verify that the match was at a
                               1358                 :      * character boundary, not in the middle of a multi-byte character.
 1535 heikki.linnakangas       1359 EUB             :      */
 1535 heikki.linnakangas       1360 GIC        3420 :     if (state->is_multibyte_char_in_char)
                               1361                 :     {
 1535 heikki.linnakangas       1362 ECB             :         /* Walk one character at a time, until we reach the match. */
 7008 tgl                      1363                 : 
                               1364                 :         /* the search should never move backwards. */
 1535 heikki.linnakangas       1365 LBC           0 :         Assert(state->refpoint <= matchptr);
                               1366                 : 
                               1367               0 :         while (state->refpoint < matchptr)
 5327 tgl                      1368 ECB             :         {
                               1369                 :             /* step to next character. */
 1535 heikki.linnakangas       1370 LBC           0 :             state->refpoint += pg_mblen(state->refpoint);
                               1371               0 :             state->refpos++;
                               1372                 : 
                               1373                 :             /*
                               1374                 :              * If we stepped over the match's start position, then it was a
                               1375                 :              * false positive, where the byte sequence appeared in the middle
                               1376                 :              * of a multi-byte character.  Skip it, and continue the search at
                               1377                 :              * the next character boundary.
                               1378                 :              */
                               1379               0 :             if (state->refpoint > matchptr)
                               1380                 :             {
 1535 heikki.linnakangas       1381 UIC           0 :                 start_ptr = state->refpoint;
                               1382               0 :                 goto retry;
                               1383                 :             }
 5327 tgl                      1384 EUB             :         }
                               1385                 :     }
                               1386                 : 
 1535 heikki.linnakangas       1387 GIC        3420 :     state->last_match = matchptr;
                               1388            3420 :     return true;
 1535 heikki.linnakangas       1389 EUB             : }
 5327 tgl                      1390                 : 
                               1391                 : /*
                               1392                 :  * Subroutine of text_position_next().  This searches for the raw byte
                               1393                 :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
                               1394                 :  * match starting at 'start_ptr', or NULL if no match is found.
                               1395                 :  */
                               1396                 : static char *
 1535 heikki.linnakangas       1397 GIC        4789 : text_position_next_internal(char *start_ptr, TextPositionState *state)
 1535 heikki.linnakangas       1398 EUB             : {
 1535 heikki.linnakangas       1399 GIC        4789 :     int         haystack_len = state->len1;
 1535 heikki.linnakangas       1400 GBC        4789 :     int         needle_len = state->len2;
                               1401            4789 :     int         skiptablemask = state->skiptablemask;
 1535 heikki.linnakangas       1402 GIC        4789 :     const char *haystack = state->str1;
                               1403            4789 :     const char *needle = state->str2;
                               1404            4789 :     const char *haystack_end = &haystack[haystack_len];
                               1405                 :     const char *hptr;
 5050 bruce                    1406 ECB             : 
 1535 heikki.linnakangas       1407 CBC        4789 :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
                               1408                 : 
 1535 heikki.linnakangas       1409 GIC        4789 :     if (needle_len == 1)
                               1410                 :     {
                               1411                 :         /* No point in using B-M-H for a one-character needle */
                               1412             377 :         char        nchar = *needle;
                               1413                 : 
                               1414             377 :         hptr = start_ptr;
                               1415            2877 :         while (hptr < haystack_end)
 1535 heikki.linnakangas       1416 ECB             :         {
 1535 heikki.linnakangas       1417 GIC        2794 :             if (*hptr == nchar)
 1535 heikki.linnakangas       1418 CBC         294 :                 return (char *) hptr;
                               1419            2500 :             hptr++;
 7535 bruce                    1420 ECB             :         }
                               1421                 :     }
 7008 tgl                      1422                 :     else
 7535 bruce                    1423                 :     {
 1535 heikki.linnakangas       1424 GIC        4412 :         const char *needle_last = &needle[needle_len - 1];
                               1425                 : 
 1535 heikki.linnakangas       1426 ECB             :         /* Start at startpos plus the length of the needle */
 1535 heikki.linnakangas       1427 GIC        4412 :         hptr = start_ptr + needle_len - 1;
 1535 heikki.linnakangas       1428 CBC      116857 :         while (hptr < haystack_end)
                               1429                 :         {
                               1430                 :             /* Match the needle scanning *backward* */
 1535 heikki.linnakangas       1431 ECB             :             const char *nptr;
                               1432                 :             const char *p;
 7008 tgl                      1433                 : 
 1535 heikki.linnakangas       1434 CBC      115571 :             nptr = needle_last;
 1535 heikki.linnakangas       1435 GIC      115571 :             p = hptr;
 1535 heikki.linnakangas       1436 CBC      160855 :             while (*nptr == *p)
 5327 tgl                      1437 ECB             :             {
 1535 heikki.linnakangas       1438                 :                 /* Matched it all?  If so, return 1-based position */
 1535 heikki.linnakangas       1439 GIC       48410 :                 if (nptr == needle)
                               1440            3126 :                     return (char *) p;
                               1441           45284 :                 nptr--, p--;
                               1442                 :             }
 1535 heikki.linnakangas       1443 ECB             : 
                               1444                 :             /*
                               1445                 :              * No match, so use the haystack char at hptr to decide how far to
                               1446                 :              * advance.  If the needle had any occurrence of that character
                               1447                 :              * (or more precisely, one sharing the same skiptable entry)
                               1448                 :              * before its last character, then we advance far enough to align
                               1449                 :              * the last such needle character with that haystack position.
                               1450                 :              * Otherwise we can advance by the whole needle length.
                               1451                 :              */
 1535 heikki.linnakangas       1452 GIC      112445 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
 5327 tgl                      1453 ECB             :         }
 1535 heikki.linnakangas       1454                 :     }
 5327 tgl                      1455                 : 
 1535 heikki.linnakangas       1456 GIC        1369 :     return 0;                   /* not found */
                               1457                 : }
 5327 tgl                      1458 ECB             : 
 1535 heikki.linnakangas       1459                 : /*
                               1460                 :  * Return a pointer to the current match.
                               1461                 :  *
                               1462                 :  * The returned pointer points into the original haystack string.
                               1463                 :  */
                               1464                 : static char *
 1535 heikki.linnakangas       1465 GIC        3381 : text_position_get_match_ptr(TextPositionState *state)
                               1466                 : {
                               1467            3381 :     return state->last_match;
                               1468                 : }
                               1469                 : 
                               1470                 : /*
 1535 heikki.linnakangas       1471 ECB             :  * Return the offset of the current match.
                               1472                 :  *
                               1473                 :  * The offset is in characters, 1-based.
                               1474                 :  */
                               1475                 : static int
 1535 heikki.linnakangas       1476 GIC          24 : text_position_get_match_pos(TextPositionState *state)
                               1477                 : {
                               1478                 :     /* Convert the byte position to char position. */
  478 john.naylor              1479              48 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
                               1480              24 :                                           state->last_match - state->refpoint);
                               1481              24 :     state->refpoint = state->last_match;
                               1482              24 :     return state->refpos + 1;
                               1483                 : }
 9770 scrappy                  1484 ECB             : 
                               1485                 : /*
  877 tgl                      1486                 :  * Reset search state to the initial state installed by text_position_setup.
                               1487                 :  *
                               1488                 :  * The next call to text_position_next will search from the beginning
                               1489                 :  * of the string.
                               1490                 :  */
                               1491                 : static void
  877 tgl                      1492 GIC           6 : text_position_reset(TextPositionState *state)
                               1493                 : {
                               1494               6 :     state->last_match = NULL;
  877 tgl                      1495 CBC           6 :     state->refpoint = state->str1;
  877 tgl                      1496 GIC           6 :     state->refpos = 0;
                               1497               6 : }
  877 tgl                      1498 ECB             : 
 6028                          1499                 : static void
 5624 bruce                    1500 CBC        1399 : text_position_cleanup(TextPositionState *state)
 6028 tgl                      1501 ECB             : {
                               1502                 :     /* no cleanup needed */
 6028 tgl                      1503 GIC        1399 : }
                               1504                 : 
                               1505                 : 
                               1506                 : static void
 1479 peter                    1507         8518884 : check_collation_set(Oid collid)
                               1508                 : {
                               1509         8518884 :     if (!OidIsValid(collid))
                               1510                 :     {
 1479 peter                    1511 ECB             :         /*
                               1512                 :          * This typically means that the parser could not resolve a conflict
                               1513                 :          * of implicit collations, so report it that way.
                               1514                 :          */
 1479 peter                    1515 CBC          24 :         ereport(ERROR,
 1479 peter                    1516 ECB             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
                               1517                 :                  errmsg("could not determine which collation to use for string comparison"),
                               1518                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
                               1519                 :     }
 1479 peter                    1520 GIC     8518860 : }
                               1521                 : 
 9063 bruce                    1522 ECB             : /* varstr_cmp()
                               1523                 :  * Comparison function for text strings with given lengths.
                               1524                 :  * Includes locale support, but must copy strings to temporary memory
                               1525                 :  *  to allow null-termination for inputs to strcoll().
 5099 heikki.linnakangas       1526                 :  * Returns an integer less than, equal to, or greater than zero, indicating
                               1527                 :  * whether arg1 is less than, equal to, or greater than arg2.
 1296 tgl                      1528                 :  *
                               1529                 :  * Note: many functions that depend on this are marked leakproof; therefore,
                               1530                 :  * avoid reporting the actual contents of the input when throwing errors.
                               1531                 :  * All errors herein should be things that can't happen except on corrupt
                               1532                 :  * data, anyway; otherwise we will have trouble with indexing strings that
                               1533                 :  * would cause them.
 9496 scrappy                  1534                 :  */
                               1535                 : int
 1986 peter_e                  1536 GIC     4679892 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
                               1537                 : {
                               1538                 :     int         result;
 9345 bruce                    1539 ECB             : 
 1479 peter                    1540 GIC     4679892 :     check_collation_set(collid);
                               1541                 : 
                               1542                 :     /*
                               1543                 :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
                               1544                 :      * have to do some memory copying.  This turns out to be significantly
                               1545                 :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
                               1546                 :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
                               1547                 :      */
 4443 peter_e                  1548         4679877 :     if (lc_collate_is_c(collid))
                               1549                 :     {
 4492 rhaas                    1550         2394653 :         result = memcmp(arg1, arg2, Min(len1, len2));
 6435 tgl                      1551         2394653 :         if ((result == 0) && (len1 != len2))
                               1552           68787 :             result = (len1 < len2) ? -1 : 1;
                               1553                 :     }
                               1554                 :     else
 6435 tgl                      1555 ECB             :     {
                               1556                 :         pg_locale_t mylocale;
                               1557                 : 
  444 peter                    1558 GIC     2285224 :         mylocale = pg_newlocale_from_collation(collid);
                               1559                 : 
                               1560                 :         /*
                               1561                 :          * memcmp() can't tell us which of two unequal strings sorts first,
                               1562                 :          * but it's a cheap way to tell if they're equal.  Testing shows that
 3124 rhaas                    1563 ECB             :          * memcmp() followed by strcoll() is only trivially slower than
                               1564                 :          * strcoll() by itself, so we don't lose much if this doesn't work out
                               1565                 :          * very often, and if it does - for example, because there are many
                               1566                 :          * equal strings in the input - then we win big by avoiding expensive
                               1567                 :          * collation-aware comparisons.
                               1568                 :          */
 3124 rhaas                    1569 GIC     2285224 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
                               1570          949522 :             return 0;
                               1571                 : 
   45 jdavis                   1572 GNC     1335702 :         result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
                               1573                 : 
                               1574                 :         /* Break tie if necessary. */
                               1575         1335702 :         if (result == 0 && pg_locale_deterministic(mylocale))
                               1576                 :         {
   45 jdavis                   1577 UNC           0 :             result = memcmp(arg1, arg2, Min(len1, len2));
                               1578               0 :             if ((result == 0) && (len1 != len2))
                               1579               0 :                 result = (len1 < len2) ? -1 : 1;
                               1580                 :         }
                               1581                 :     }
                               1582                 : 
 8986 bruce                    1583 CBC     3730355 :     return result;
                               1584                 : }
                               1585                 : 
                               1586                 : /* text_cmp()
 8011 tgl                      1587 ECB             :  * Internal comparison function for text strings.
                               1588                 :  * Returns -1, 0 or 1
 9496 scrappy                  1589                 :  */
 8949 bruce                    1590                 : static int
 4443 peter_e                  1591 GIC     3848460 : text_cmp(text *arg1, text *arg2, Oid collid)
                               1592                 : {
 8986 bruce                    1593 ECB             :     char       *a1p,
                               1594                 :                *a2p;
                               1595                 :     int         len1,
                               1596                 :                 len2;
                               1597                 : 
 5847 tgl                      1598 CBC     3848460 :     a1p = VARDATA_ANY(arg1);
 5847 tgl                      1599 GIC     3848460 :     a2p = VARDATA_ANY(arg2);
                               1600                 : 
                               1601         3848460 :     len1 = VARSIZE_ANY_EXHDR(arg1);
 5847 tgl                      1602 CBC     3848460 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               1603                 : 
 4443 peter_e                  1604         3848460 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
 8312 tgl                      1605 ECB             : }
                               1606                 : 
                               1607                 : /*
                               1608                 :  * Comparison functions for text strings.
                               1609                 :  *
 8306                          1610                 :  * Note: btree indexes need these routines not to leak memory; therefore,
                               1611                 :  * be careful to free working copies of toasted datums.  Most places don't
                               1612                 :  * need to be so careful.
 9063 bruce                    1613                 :  */
                               1614                 : 
                               1615                 : Datum
 8011 tgl                      1616 GIC     3610619 : texteq(PG_FUNCTION_ARGS)
 8011 tgl                      1617 ECB             : {
 1479 peter                    1618 GIC     3610619 :     Oid         collid = PG_GET_COLLATION();
  444 peter                    1619 CBC     3610619 :     bool        locale_is_c = false;
  332 tgl                      1620         3610619 :     pg_locale_t mylocale = 0;
                               1621                 :     bool        result;
                               1622                 : 
 1479 peter                    1623         3610619 :     check_collation_set(collid);
                               1624                 : 
  444                          1625         3610619 :     if (lc_collate_is_c(collid))
                               1626          273132 :         locale_is_c = true;
                               1627                 :     else
                               1628         3337487 :         mylocale = pg_newlocale_from_collation(collid);
                               1629                 : 
   45 jdavis                   1630 GNC     3610619 :     if (locale_is_c || pg_locale_deterministic(mylocale))
 1479 peter                    1631 GIC     3610377 :     {
 1479 peter                    1632 CBC     3610377 :         Datum       arg1 = PG_GETARG_DATUM(0);
 1479 peter                    1633 GIC     3610377 :         Datum       arg2 = PG_GETARG_DATUM(1);
 1479 peter                    1634 ECB             :         Size        len1,
                               1635                 :                     len2;
                               1636                 : 
                               1637                 :         /*
 1418 tgl                      1638                 :          * Since we only care about equality or not-equality, we can avoid all
                               1639                 :          * the expense of strcoll() here, and just do bitwise comparison.  In
                               1640                 :          * fact, we don't even have to do a bitwise comparison if we can show
                               1641                 :          * the lengths of the strings are unequal; which might save us from
                               1642                 :          * having to detoast one or both values.
 1479 peter                    1643                 :          */
 1479 peter                    1644 GIC     3610377 :         len1 = toast_raw_datum_size(arg1);
                               1645         3610377 :         len2 = toast_raw_datum_size(arg2);
                               1646         3610377 :         if (len1 != len2)
 1479 peter                    1647 CBC     1435697 :             result = false;
                               1648                 :         else
 1479 peter                    1649 ECB             :         {
 1479 peter                    1650 CBC     2174680 :             text       *targ1 = DatumGetTextPP(arg1);
                               1651         2174680 :             text       *targ2 = DatumGetTextPP(arg2);
 1479 peter                    1652 ECB             : 
 1479 peter                    1653 GIC     2174680 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
                               1654                 :                              len1 - VARHDRSZ) == 0);
                               1655                 : 
                               1656         2174680 :             PG_FREE_IF_COPY(targ1, 0);
 1479 peter                    1657 CBC     2174680 :             PG_FREE_IF_COPY(targ2, 1);
                               1658                 :         }
 1479 peter                    1659 ECB             :     }
 8011 tgl                      1660                 :     else
                               1661                 :     {
 1479 peter                    1662 CBC         242 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
 1479 peter                    1663 GBC         242 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1664                 : 
 1479 peter                    1665 GIC         242 :         result = (text_cmp(arg1, arg2, collid) == 0);
                               1666                 : 
 1479 peter                    1667 CBC         242 :         PG_FREE_IF_COPY(arg1, 0);
                               1668             242 :         PG_FREE_IF_COPY(arg2, 1);
 4464 tgl                      1669 ECB             :     }
 8011 tgl                      1670 EUB             : 
 8011 tgl                      1671 GIC     3610619 :     PG_RETURN_BOOL(result);
                               1672                 : }
 8011 tgl                      1673 ECB             : 
                               1674                 : Datum
 8011 tgl                      1675 GIC        9726 : textne(PG_FUNCTION_ARGS)
 8011 tgl                      1676 ECB             : {
 1479 peter                    1677 CBC        9726 :     Oid         collid = PG_GET_COLLATION();
  444 peter                    1678 GIC        9726 :     bool        locale_is_c = false;
  332 tgl                      1679 CBC        9726 :     pg_locale_t mylocale = 0;
 8011 tgl                      1680 ECB             :     bool        result;
                               1681                 : 
 1479 peter                    1682 GIC        9726 :     check_collation_set(collid);
 1479 peter                    1683 ECB             : 
  444 peter                    1684 GIC        9726 :     if (lc_collate_is_c(collid))
                               1685               9 :         locale_is_c = true;
                               1686                 :     else
  444 peter                    1687 CBC        9717 :         mylocale = pg_newlocale_from_collation(collid);
                               1688                 : 
   45 jdavis                   1689 GNC        9726 :     if (locale_is_c || pg_locale_deterministic(mylocale))
 1479 peter                    1690 CBC        9714 :     {
 1479 peter                    1691 GIC        9714 :         Datum       arg1 = PG_GETARG_DATUM(0);
                               1692            9714 :         Datum       arg2 = PG_GETARG_DATUM(1);
 1479 peter                    1693 ECB             :         Size        len1,
                               1694                 :                     len2;
                               1695                 : 
                               1696                 :         /* See comment in texteq() */
 1479 peter                    1697 GIC        9714 :         len1 = toast_raw_datum_size(arg1);
 1479 peter                    1698 CBC        9714 :         len2 = toast_raw_datum_size(arg2);
 1479 peter                    1699 GIC        9714 :         if (len1 != len2)
                               1700             917 :             result = true;
                               1701                 :         else
 1479 peter                    1702 ECB             :         {
 1479 peter                    1703 GIC        8797 :             text       *targ1 = DatumGetTextPP(arg1);
 1479 peter                    1704 CBC        8797 :             text       *targ2 = DatumGetTextPP(arg2);
 1479 peter                    1705 ECB             : 
 1479 peter                    1706 GIC        8797 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
                               1707                 :                              len1 - VARHDRSZ) != 0);
 1479 peter                    1708 ECB             : 
 1479 peter                    1709 GIC        8797 :             PG_FREE_IF_COPY(targ1, 0);
                               1710            8797 :             PG_FREE_IF_COPY(targ2, 1);
 1479 peter                    1711 ECB             :         }
                               1712                 :     }
 8011 tgl                      1713                 :     else
                               1714                 :     {
 1479 peter                    1715 CBC          12 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
 1479 peter                    1716 GIC          12 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1717                 : 
                               1718              12 :         result = (text_cmp(arg1, arg2, collid) != 0);
                               1719                 : 
                               1720              12 :         PG_FREE_IF_COPY(arg1, 0);
                               1721              12 :         PG_FREE_IF_COPY(arg2, 1);
                               1722                 :     }
                               1723                 : 
 8011 tgl                      1724            9726 :     PG_RETURN_BOOL(result);
                               1725                 : }
                               1726                 : 
                               1727                 : Datum
 8312                          1728           62299 : text_lt(PG_FUNCTION_ARGS)
 9063 bruce                    1729 ECB             : {
 5847 tgl                      1730 GIC       62299 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
 5847 tgl                      1731 CBC       62299 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
 8306 tgl                      1732 ECB             :     bool        result;
                               1733                 : 
 4443 peter_e                  1734 CBC       62299 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
                               1735                 : 
 8306 tgl                      1736           62284 :     PG_FREE_IF_COPY(arg1, 0);
 8306 tgl                      1737 GIC       62284 :     PG_FREE_IF_COPY(arg2, 1);
                               1738                 : 
                               1739           62284 :     PG_RETURN_BOOL(result);
                               1740                 : }
                               1741                 : 
                               1742                 : Datum
 8312                          1743          162176 : text_le(PG_FUNCTION_ARGS)
                               1744                 : {
 5847                          1745          162176 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1746          162176 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1747                 :     bool        result;
                               1748                 : 
 4443 peter_e                  1749          162176 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
                               1750                 : 
 8306 tgl                      1751 CBC      162176 :     PG_FREE_IF_COPY(arg1, 0);
 8306 tgl                      1752 GIC      162176 :     PG_FREE_IF_COPY(arg2, 1);
 9770 scrappy                  1753 ECB             : 
 8306 tgl                      1754 CBC      162176 :     PG_RETURN_BOOL(result);
 8312 tgl                      1755 ECB             : }
                               1756                 : 
                               1757                 : Datum
 8312 tgl                      1758 GIC       56829 : text_gt(PG_FUNCTION_ARGS)
 9770 scrappy                  1759 ECB             : {
 5847 tgl                      1760 GIC       56829 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1761           56829 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
 8306 tgl                      1762 ECB             :     bool        result;
                               1763                 : 
 4443 peter_e                  1764 CBC       56829 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
                               1765                 : 
 8306 tgl                      1766 GIC       56829 :     PG_FREE_IF_COPY(arg1, 0);
                               1767           56829 :     PG_FREE_IF_COPY(arg2, 1);
                               1768                 : 
                               1769           56829 :     PG_RETURN_BOOL(result);
                               1770                 : }
                               1771                 : 
                               1772                 : Datum
 8312 tgl                      1773 CBC       92446 : text_ge(PG_FUNCTION_ARGS)
                               1774                 : {
 5847 tgl                      1775 GIC       92446 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1776           92446 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1777                 :     bool        result;
 8306 tgl                      1778 ECB             : 
 4443 peter_e                  1779 GIC       92446 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
 8306 tgl                      1780 EUB             : 
 8306 tgl                      1781 GIC       92446 :     PG_FREE_IF_COPY(arg1, 0);
 8306 tgl                      1782 GBC       92446 :     PG_FREE_IF_COPY(arg2, 1);
                               1783                 : 
 8306 tgl                      1784 GIC       92446 :     PG_RETURN_BOOL(result);
 9770 scrappy                  1785 ECB             : }
                               1786                 : 
                               1787                 : Datum
 1832 teodor                   1788 GIC       18957 : text_starts_with(PG_FUNCTION_ARGS)
                               1789                 : {
                               1790           18957 :     Datum       arg1 = PG_GETARG_DATUM(0);
                               1791           18957 :     Datum       arg2 = PG_GETARG_DATUM(1);
 1479 peter                    1792           18957 :     Oid         collid = PG_GET_COLLATION();
 1418 tgl                      1793           18957 :     pg_locale_t mylocale = 0;
                               1794                 :     bool        result;
                               1795                 :     Size        len1,
                               1796                 :                 len2;
                               1797                 : 
 1479 peter                    1798           18957 :     check_collation_set(collid);
                               1799                 : 
  444                          1800           18957 :     if (!lc_collate_is_c(collid))
 1479                          1801           18957 :         mylocale = pg_newlocale_from_collation(collid);
                               1802                 : 
   45 jdavis                   1803 GNC       18957 :     if (!pg_locale_deterministic(mylocale))
 1479 peter                    1804 LBC           0 :         ereport(ERROR,
                               1805                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                               1806                 :                  errmsg("nondeterministic collations are not supported for substring searches")));
                               1807                 : 
 1832 teodor                   1808 GIC       18957 :     len1 = toast_raw_datum_size(arg1);
                               1809           18957 :     len2 = toast_raw_datum_size(arg2);
                               1810           18957 :     if (len2 > len1)
 1832 teodor                   1811 UIC           0 :         result = false;
                               1812                 :     else
 1832 teodor                   1813 ECB             :     {
 1468 sfrost                   1814 GIC       18957 :         text       *targ1 = text_substring(arg1, 1, len2, false);
 1832 teodor                   1815 CBC       18957 :         text       *targ2 = DatumGetTextPP(arg2);
 1832 teodor                   1816 ECB             : 
 1832 teodor                   1817 CBC       18957 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
                               1818           18957 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
 1832 teodor                   1819 ECB             : 
 1832 teodor                   1820 GIC       18957 :         PG_FREE_IF_COPY(targ1, 0);
 1832 teodor                   1821 CBC       18957 :         PG_FREE_IF_COPY(targ2, 1);
 1832 teodor                   1822 ECB             :     }
                               1823                 : 
 1832 teodor                   1824 CBC       18957 :     PG_RETURN_BOOL(result);
 1832 teodor                   1825 ECB             : }
                               1826                 : 
                               1827                 : Datum
 8011 tgl                      1828 GIC     3316584 : bttextcmp(PG_FUNCTION_ARGS)
                               1829                 : {
 5847                          1830         3316584 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1831         3316584 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1832                 :     int32       result;
                               1833                 : 
 4443 peter_e                  1834         3316584 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
                               1835                 : 
 8011 tgl                      1836         3316584 :     PG_FREE_IF_COPY(arg1, 0);
                               1837         3316584 :     PG_FREE_IF_COPY(arg2, 1);
                               1838                 : 
                               1839         3316584 :     PG_RETURN_INT32(result);
                               1840                 : }
 8011 tgl                      1841 ECB             : 
 3160 rhaas                    1842                 : Datum
 3160 rhaas                    1843 CBC       44359 : bttextsortsupport(PG_FUNCTION_ARGS)
 3160 rhaas                    1844 ECB             : {
 2878 bruce                    1845 GIC       44359 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
                               1846           44359 :     Oid         collid = ssup->ssup_collation;
                               1847                 :     MemoryContext oldcontext;
                               1848                 : 
 3160 rhaas                    1849           44359 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
                               1850                 : 
 2622 rhaas                    1851 ECB             :     /* Use generic string SortSupport */
 1572 tgl                      1852 GIC       44359 :     varstr_sortsupport(ssup, TEXTOID, collid);
 3160 rhaas                    1853 ECB             : 
 3160 rhaas                    1854 CBC       44350 :     MemoryContextSwitchTo(oldcontext);
 3160 rhaas                    1855 ECB             : 
 3160 rhaas                    1856 CBC       44350 :     PG_RETURN_VOID();
 3160 rhaas                    1857 ECB             : }
                               1858                 : 
 2622                          1859                 : /*
                               1860                 :  * Generic sortsupport interface for character type's operator classes.
                               1861                 :  * Includes locale support, and support for BpChar semantics (i.e. removing
                               1862                 :  * trailing spaces before comparison).
                               1863                 :  *
                               1864                 :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
                               1865                 :  * same representation.  Callers that always use the C collation (e.g.
                               1866                 :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
                               1867                 :  * this will not work with any other collation, though.
                               1868                 :  */
                               1869                 : void
 1550 tgl                      1870 CBC       83052 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
 3160 rhaas                    1871 ECB             : {
 2878 bruce                    1872 GIC       83052 :     bool        abbreviate = ssup->abbreviate;
                               1873           83052 :     bool        collate_c = false;
                               1874                 :     VarStringSortSupport *sss;
                               1875           83052 :     pg_locale_t locale = 0;
                               1876                 : 
 1479 peter                    1877           83052 :     check_collation_set(collid);
 1479 peter                    1878 ECB             : 
 3160 rhaas                    1879                 :     /*
                               1880                 :      * If possible, set ssup->comparator to a function which can be used to
 2999                          1881                 :      * directly compare two datums.  If we can do this, we'll avoid the
 2878 bruce                    1882                 :      * overhead of a trip through the fmgr layer for every comparison, which
                               1883                 :      * can be substantial.
 3002 rhaas                    1884                 :      *
 1572 tgl                      1885                 :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
                               1886                 :      * which uses strcoll() to perform comparisons.  We use that for the
                               1887                 :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
                               1888                 :      * LC_COLLATE = C, we can make things quite a bit faster with
                               1889                 :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
 1572 tgl                      1890 EUB             :      * memcmp() rather than strcoll().
 3160 rhaas                    1891 ECB             :      */
 2999 rhaas                    1892 GBC       83043 :     if (lc_collate_is_c(collid))
                               1893                 :     {
 1550 tgl                      1894 CBC       56902 :         if (typid == BPCHAROID)
 2622 rhaas                    1895 GIC          11 :             ssup->comparator = bpcharfastcmp_c;
 1550 tgl                      1896           56891 :         else if (typid == NAMEOID)
                               1897                 :         {
 1572                          1898           38227 :             ssup->comparator = namefastcmp_c;
                               1899                 :             /* Not supporting abbreviation with type NAME, for now */
                               1900           38227 :             abbreviate = false;
                               1901                 :         }
                               1902                 :         else
                               1903           18664 :             ssup->comparator = varstrfastcmp_c;
                               1904                 : 
 2999 rhaas                    1905 CBC       56902 :         collate_c = true;
                               1906                 :     }
 2999 rhaas                    1907 ECB             :     else
 3160                          1908                 :     {
                               1909                 :         /*
                               1910                 :          * We need a collation-sensitive comparison.  To make things faster,
                               1911                 :          * we'll figure out the collation based on the locale id and cache the
                               1912                 :          * result.
                               1913                 :          */
  444 peter                    1914 GIC       26141 :         locale = pg_newlocale_from_collation(collid);
 2023 peter_e                  1915 ECB             : 
                               1916                 :         /*
 1572 tgl                      1917                 :          * We use varlenafastcmp_locale except for type NAME.
                               1918                 :          */
 1550 tgl                      1919 GIC       26141 :         if (typid == NAMEOID)
                               1920                 :         {
 1572 tgl                      1921 UIC           0 :             ssup->comparator = namefastcmp_locale;
                               1922                 :             /* Not supporting abbreviation with type NAME, for now */
                               1923               0 :             abbreviate = false;
 1572 tgl                      1924 ECB             :         }
                               1925                 :         else
 1572 tgl                      1926 CBC       26141 :             ssup->comparator = varlenafastcmp_locale;
 3160 rhaas                    1927 ECB             :     }
                               1928                 : 
 3002                          1929                 :     /*
                               1930                 :      * Unfortunately, it seems that abbreviation for non-C collations is
                               1931                 :      * broken on many common platforms; see pg_strxfrm_enabled().
 2573                          1932                 :      *
                               1933                 :      * Even apart from the risk of broken locales, it's possible that there
                               1934                 :      * are platforms where the use of abbreviated keys should be disabled at
                               1935                 :      * compile time.  Having only 4 byte datums could make worst-case
                               1936                 :      * performance drastically more likely, for example.  Moreover, macOS's
                               1937                 :      * strxfrm() implementation is known to not effectively concentrate a
                               1938                 :      * significant amount of entropy from the original string in earlier
                               1939                 :      * transformed blobs.  It's possible that other supported platforms are
                               1940                 :      * similarly encumbered.  So, if we ever get past disabling this
                               1941                 :      * categorically, we may still want or need to disable it for particular
                               1942                 :      * platforms.
 3002                          1943                 :      */
   45 jdavis                   1944 GNC       83043 :     if (!collate_c && !pg_strxfrm_enabled(locale))
 2573 rhaas                    1945 CBC         105 :         abbreviate = false;
                               1946                 : 
 2999 rhaas                    1947 ECB             :     /*
                               1948                 :      * If we're using abbreviated keys, or if we're using a locale-aware
 1335 michael                  1949                 :      * comparison, we need to initialize a VarStringSortSupport object. Both
 2622 rhaas                    1950                 :      * cases will make use of the temporary buffers we initialize here for
                               1951                 :      * scratch space (and to detect requirement for BpChar semantics from
                               1952                 :      * caller), and the abbreviation case requires additional state.
                               1953                 :      */
 2999 rhaas                    1954 GIC       83043 :     if (abbreviate || !collate_c)
                               1955                 :     {
 2617 tgl                      1956           27973 :         sss = palloc(sizeof(VarStringSortSupport));
 2622 rhaas                    1957           27973 :         sss->buf1 = palloc(TEXTBUFLEN);
                               1958           27973 :         sss->buflen1 = TEXTBUFLEN;
 2622 rhaas                    1959 GBC       27973 :         sss->buf2 = palloc(TEXTBUFLEN);
 2622 rhaas                    1960 GIC       27973 :         sss->buflen2 = TEXTBUFLEN;
 2739 rhaas                    1961 EUB             :         /* Start with invalid values */
 2622 rhaas                    1962 GBC       27973 :         sss->last_len1 = -1;
 2622 rhaas                    1963 GIC       27973 :         sss->last_len2 = -1;
 2728 rhaas                    1964 EUB             :         /* Initialize */
 2622 rhaas                    1965 GBC       27973 :         sss->last_returned = 0;
 2622 rhaas                    1966 GIC       27973 :         sss->locale = locale;
                               1967                 : 
                               1968                 :         /*
                               1969                 :          * To avoid somehow confusing a strxfrm() blob and an original string,
                               1970                 :          * constantly keep track of the variety of data that buf1 and buf2
                               1971                 :          * currently contain.
                               1972                 :          *
 2728 rhaas                    1973 ECB             :          * Comparisons may be interleaved with conversion calls.  Frequently,
                               1974                 :          * conversions and comparisons are batched into two distinct phases,
                               1975                 :          * but the correctness of caching cannot hinge upon this.  For
                               1976                 :          * comparison caching, buffer state is only trusted if cache_blob is
                               1977                 :          * found set to false, whereas strxfrm() caching only trusts the state
                               1978                 :          * when cache_blob is found set to true.
                               1979                 :          *
                               1980                 :          * Arbitrarily initialize cache_blob to true.
                               1981                 :          */
 2622 rhaas                    1982 GIC       27973 :         sss->cache_blob = true;
                               1983           27973 :         sss->collate_c = collate_c;
 1550 tgl                      1984           27973 :         sss->typid = typid;
 2622 rhaas                    1985           27973 :         ssup->ssup_extra = sss;
                               1986                 : 
                               1987                 :         /*
                               1988                 :          * If possible, plan to use the abbreviated keys optimization.  The
                               1989                 :          * core code may switch back to authoritative comparator should
                               1990                 :          * abbreviation be aborted.
                               1991                 :          */
 2999                          1992           27973 :         if (abbreviate)
                               1993                 :         {
 2622                          1994           24109 :             sss->prop_card = 0.20;
 2622 rhaas                    1995 CBC       24109 :             initHyperLogLog(&sss->abbr_card, 10);
 2622 rhaas                    1996 GIC       24109 :             initHyperLogLog(&sss->full_card, 10);
 2999                          1997           24109 :             ssup->abbrev_full_comparator = ssup->comparator;
  372 john.naylor              1998 CBC       24109 :             ssup->comparator = ssup_datum_unsigned_cmp;
 2622 rhaas                    1999 GIC       24109 :             ssup->abbrev_converter = varstr_abbrev_convert;
                               2000           24109 :             ssup->abbrev_abort = varstr_abbrev_abort;
 2999 rhaas                    2001 ECB             :         }
                               2002                 :     }
 3160 rhaas                    2003 GIC       83043 : }
                               2004                 : 
 3160 rhaas                    2005 ECB             : /*
                               2006                 :  * sortsupport comparison func (for C locale case)
 3160 rhaas                    2007 EUB             :  */
                               2008                 : static int
 2622 rhaas                    2009 GIC    63929703 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
 3160 rhaas                    2010 ECB             : {
 2617 tgl                      2011 GIC    63929703 :     VarString  *arg1 = DatumGetVarStringPP(x);
 2617 tgl                      2012 CBC    63929703 :     VarString  *arg2 = DatumGetVarStringPP(y);
 3160 rhaas                    2013 ECB             :     char       *a1p,
                               2014                 :                *a2p;
                               2015                 :     int         len1,
                               2016                 :                 len2,
                               2017                 :                 result;
                               2018                 : 
 3160 rhaas                    2019 GIC    63929703 :     a1p = VARDATA_ANY(arg1);
                               2020        63929703 :     a2p = VARDATA_ANY(arg2);
                               2021                 : 
                               2022        63929703 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                               2023        63929703 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               2024                 : 
                               2025        63929703 :     result = memcmp(a1p, a2p, Min(len1, len2));
 3160 rhaas                    2026 CBC    63929703 :     if ((result == 0) && (len1 != len2))
                               2027         1846743 :         result = (len1 < len2) ? -1 : 1;
                               2028                 : 
 3160 rhaas                    2029 ECB             :     /* We can't afford to leak memory here. */
 3160 rhaas                    2030 CBC    63929703 :     if (PointerGetDatum(arg1) != x)
 3160 rhaas                    2031 LBC           0 :         pfree(arg1);
 3160 rhaas                    2032 CBC    63929703 :     if (PointerGetDatum(arg2) != y)
 3160 rhaas                    2033 UIC           0 :         pfree(arg2);
                               2034                 : 
 3160 rhaas                    2035 GIC    63929703 :     return result;
                               2036                 : }
                               2037                 : 
                               2038                 : /*
                               2039                 :  * sortsupport comparison func (for BpChar C locale case)
                               2040                 :  *
 2622 rhaas                    2041 ECB             :  * BpChar outsources its sortsupport to this module.  Specialization for the
                               2042                 :  * varstr_sortsupport BpChar case, modeled on
                               2043                 :  * internal_bpchar_pattern_compare().
                               2044                 :  */
                               2045                 : static int
 2622 rhaas                    2046 GIC           8 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
 2622 rhaas                    2047 ECB             : {
 2622 rhaas                    2048 GIC           8 :     BpChar     *arg1 = DatumGetBpCharPP(x);
                               2049               8 :     BpChar     *arg2 = DatumGetBpCharPP(y);
 2622 rhaas                    2050 ECB             :     char       *a1p,
                               2051                 :                *a2p;
                               2052                 :     int         len1,
                               2053                 :                 len2,
                               2054                 :                 result;
                               2055                 : 
 2622 rhaas                    2056 CBC           8 :     a1p = VARDATA_ANY(arg1);
 2622 rhaas                    2057 GBC           8 :     a2p = VARDATA_ANY(arg2);
                               2058                 : 
 2622 rhaas                    2059 GIC           8 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
 2622 rhaas                    2060 CBC           8 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
 2622 rhaas                    2061 ECB             : 
 2622 rhaas                    2062 CBC           8 :     result = memcmp(a1p, a2p, Min(len1, len2));
 2622 rhaas                    2063 GIC           8 :     if ((result == 0) && (len1 != len2))
 2622 rhaas                    2064 UIC           0 :         result = (len1 < len2) ? -1 : 1;
                               2065                 : 
                               2066                 :     /* We can't afford to leak memory here. */
 2622 rhaas                    2067 GIC           8 :     if (PointerGetDatum(arg1) != x)
 2622 rhaas                    2068 UIC           0 :         pfree(arg1);
 2622 rhaas                    2069 GIC           8 :     if (PointerGetDatum(arg2) != y)
 2622 rhaas                    2070 UIC           0 :         pfree(arg2);
                               2071                 : 
 2622 rhaas                    2072 GIC           8 :     return result;
 2622 rhaas                    2073 ECB             : }
                               2074                 : 
 3160                          2075                 : /*
 1572 tgl                      2076                 :  * sortsupport comparison func (for NAME C locale case)
                               2077                 :  */
                               2078                 : static int
 1572 tgl                      2079 GIC    63594147 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
                               2080                 : {
                               2081        63594147 :     Name        arg1 = DatumGetName(x);
                               2082        63594147 :     Name        arg2 = DatumGetName(y);
                               2083                 : 
                               2084        63594147 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
                               2085                 : }
 1572 tgl                      2086 ECB             : 
                               2087                 : /*
                               2088                 :  * sortsupport comparison func (for locale case with all varlena types)
 3160 rhaas                    2089                 :  */
                               2090                 : static int
 1572 tgl                      2091 GIC    18640117 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
 3160 rhaas                    2092 ECB             : {
 2617 tgl                      2093 CBC    18640117 :     VarString  *arg1 = DatumGetVarStringPP(x);
 2617 tgl                      2094 GIC    18640117 :     VarString  *arg2 = DatumGetVarStringPP(y);
                               2095                 :     char       *a1p,
                               2096                 :                *a2p;
                               2097                 :     int         len1,
                               2098                 :                 len2,
                               2099                 :                 result;
                               2100                 : 
 3160 rhaas                    2101        18640117 :     a1p = VARDATA_ANY(arg1);
                               2102        18640117 :     a2p = VARDATA_ANY(arg2);
                               2103                 : 
                               2104        18640117 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                               2105        18640117 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               2106                 : 
 1572 tgl                      2107        18640117 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
                               2108                 : 
                               2109                 :     /* We can't afford to leak memory here. */
                               2110        18640117 :     if (PointerGetDatum(arg1) != x)
                               2111               3 :         pfree(arg1);
                               2112        18640117 :     if (PointerGetDatum(arg2) != y)
                               2113               3 :         pfree(arg2);
                               2114                 : 
                               2115        18640117 :     return result;
                               2116                 : }
                               2117                 : 
                               2118                 : /*
                               2119                 :  * sortsupport comparison func (for locale case with NAME type)
                               2120                 :  */
                               2121                 : static int
 1572 tgl                      2122 LBC           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
 1572 tgl                      2123 ECB             : {
 1572 tgl                      2124 UIC           0 :     Name        arg1 = DatumGetName(x);
                               2125               0 :     Name        arg2 = DatumGetName(y);
                               2126                 : 
                               2127               0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
                               2128               0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
                               2129                 :                                 ssup);
                               2130                 : }
                               2131                 : 
                               2132                 : /*
                               2133                 :  * sortsupport comparison func for locale cases
 1572 tgl                      2134 ECB             :  */
                               2135                 : static int
 1572 tgl                      2136 CBC    18640117 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 1572 tgl                      2137 ECB             : {
 1572 tgl                      2138 GIC    18640117 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
                               2139                 :     int         result;
                               2140                 :     bool        arg1_match;
 1572 tgl                      2141 ECB             : 
 3124 rhaas                    2142                 :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
 3124 rhaas                    2143 GIC    18640117 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
 3124 rhaas                    2144 ECB             :     {
                               2145                 :         /*
 2739                          2146                 :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
                               2147                 :          * last_len2.  Existing contents of buffers might still be used by
                               2148                 :          * next call.
 2622                          2149                 :          *
                               2150                 :          * It's fine to allow the comparison of BpChar padding bytes here,
                               2151                 :          * even though that implies that the memcmp() will usually be
                               2152                 :          * performed for BpChar callers (though multibyte characters could
                               2153                 :          * still prevent that from occurring).  The memcmp() is still very
                               2154                 :          * cheap, and BpChar's funny semantics have us remove trailing spaces
 2617 tgl                      2155                 :          * (not limited to padding), so we need make no distinction between
                               2156                 :          * padding space characters and "real" space characters.
                               2157                 :          */
 1572 tgl                      2158 CBC     6879216 :         return 0;
                               2159                 :     }
 3124 rhaas                    2160 ECB             : 
 1550 tgl                      2161 GIC    11760901 :     if (sss->typid == BPCHAROID)
 3160 rhaas                    2162 EUB             :     {
                               2163                 :         /* Get true number of bytes, ignoring trailing spaces */
 2622 rhaas                    2164 GBC       16919 :         len1 = bpchartruelen(a1p, len1);
 2622 rhaas                    2165 GIC       16919 :         len2 = bpchartruelen(a2p, len2);
                               2166                 :     }
 2622 rhaas                    2167 ECB             : 
 2622 rhaas                    2168 GIC    11760901 :     if (len1 >= sss->buflen1)
 3160 rhaas                    2169 ECB             :     {
 2622 rhaas                    2170 UIC           0 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
  238 tgl                      2171               0 :         sss->buf1 = repalloc(sss->buf1, sss->buflen1);
                               2172                 :     }
 2622 rhaas                    2173 GIC    11760901 :     if (len2 >= sss->buflen2)
                               2174                 :     {
                               2175               3 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
  238 tgl                      2176               3 :         sss->buf2 = repalloc(sss->buf2, sss->buflen2);
                               2177                 :     }
                               2178                 : 
                               2179                 :     /*
                               2180                 :      * We're likely to be asked to compare the same strings repeatedly, and
                               2181                 :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
 2739 rhaas                    2182 EUB             :      * comparisons, even though in general there is no reason to think that
                               2183                 :      * that will work out (every string datum may be unique).  Caching does
                               2184                 :      * not slow things down measurably when it doesn't work out, and can speed
                               2185                 :      * things up by rather a lot when it does.  In part, this is because the
                               2186                 :      * memcmp() compares data from cachelines that are needed in L1 cache even
                               2187                 :      * when the last comparison's result cannot be reused.
                               2188                 :      */
 2739 rhaas                    2189 GIC    11760901 :     arg1_match = true;
 2622                          2190        11760901 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
                               2191                 :     {
 2739 rhaas                    2192 GBC    10502281 :         arg1_match = false;
 2622 rhaas                    2193 GIC    10502281 :         memcpy(sss->buf1, a1p, len1);
 2622 rhaas                    2194 GBC    10502281 :         sss->buf1[len1] = '\0';
 2622 rhaas                    2195 GIC    10502281 :         sss->last_len1 = len1;
                               2196                 :     }
                               2197                 : 
                               2198                 :     /*
                               2199                 :      * If we're comparing the same two strings as last time, we can return the
                               2200                 :      * same answer without calling strcoll() again.  This is more likely than
                               2201                 :      * it seems (at least with moderate to low cardinality sets), because
                               2202                 :      * quicksort compares the same pivot against many values.
                               2203                 :      */
                               2204        11760901 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
                               2205                 :     {
                               2206         1944023 :         memcpy(sss->buf2, a2p, len2);
 2622 rhaas                    2207 CBC     1944023 :         sss->buf2[len2] = '\0';
 2622 rhaas                    2208 GIC     1944023 :         sss->last_len2 = len2;
                               2209                 :     }
                               2210         9816878 :     else if (arg1_match && !sss->cache_blob)
                               2211                 :     {
                               2212                 :         /* Use result cached following last actual strcoll() call */
 1572 tgl                      2213         1067944 :         return sss->last_returned;
                               2214                 :     }
                               2215                 : 
   45 jdavis                   2216 GNC    10692957 :     result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
                               2217                 : 
                               2218                 :     /* Break tie if necessary. */
                               2219        10692957 :     if (result == 0 && pg_locale_deterministic(sss->locale))
 2622 rhaas                    2220 LBC           0 :         result = strcmp(sss->buf1, sss->buf2);
                               2221                 : 
                               2222                 :     /* Cache result, perhaps saving an expensive strcoll() call next time */
 2622 rhaas                    2223 GIC    10692957 :     sss->cache_blob = false;
 2622 rhaas                    2224 CBC    10692957 :     sss->last_returned = result;
 3160 rhaas                    2225 GIC    10692957 :     return result;
                               2226                 : }
 8011 tgl                      2227 ECB             : 
 3002 rhaas                    2228                 : /*
                               2229                 :  * Conversion routine for sortsupport.  Converts original to abbreviated key
 2622                          2230                 :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
                               2231                 :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
                               2232                 :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
                               2233                 :  * locale is used, or in case of bytea, just memcpy() from original instead.
                               2234                 :  */
                               2235                 : static Datum
 2622 rhaas                    2236 GIC      577029 : varstr_abbrev_convert(Datum original, SortSupport ssup)
                               2237                 : {
   45 jdavis                   2238 GNC      577029 :     const size_t max_prefix_bytes = sizeof(Datum);
 2617 tgl                      2239 CBC      577029 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
 2617 tgl                      2240 GBC      577029 :     VarString  *authoritative = DatumGetVarStringPP(original);
 2617 tgl                      2241 GIC      577029 :     char       *authoritative_data = VARDATA_ANY(authoritative);
 3002 rhaas                    2242 ECB             : 
 3002 rhaas                    2243 EUB             :     /* working state */
                               2244                 :     Datum       res;
                               2245                 :     char       *pres;
                               2246                 :     int         len;
                               2247                 :     uint32      hash;
                               2248                 : 
 3002 rhaas                    2249 GIC      577029 :     pres = (char *) &res;
                               2250                 :     /* memset(), so any non-overwritten bytes are NUL */
   45 jdavis                   2251 GNC      577029 :     memset(pres, 0, max_prefix_bytes);
 3002 rhaas                    2252 GIC      577029 :     len = VARSIZE_ANY_EXHDR(authoritative);
 3002 rhaas                    2253 EUB             : 
                               2254                 :     /* Get number of bytes, ignoring trailing spaces */
 1550 tgl                      2255 GBC      577029 :     if (sss->typid == BPCHAROID)
 2622 rhaas                    2256 GIC        1296 :         len = bpchartruelen(authoritative_data, len);
                               2257                 : 
                               2258                 :     /*
                               2259                 :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
                               2260                 :      * abbreviate keys.  The full comparator for the C locale is always
                               2261                 :      * memcmp().  It would be incorrect to allow bytea callers (callers that
                               2262                 :      * always force the C collation -- bytea isn't a collatable type, but this
                               2263                 :      * approach is convenient) to use strxfrm().  This is because bytea
                               2264                 :      * strings may contain NUL bytes.  Besides, this should be faster, too.
                               2265                 :      *
                               2266                 :      * More generally, it's okay that bytea callers can have NUL bytes in
                               2267                 :      * strings because abbreviated cmp need not make a distinction between
                               2268                 :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
                               2269                 :      * authoritative representation.  Hopefully a comparison at or past one
                               2270                 :      * abbreviated key's terminating NUL byte will resolve the comparison
                               2271                 :      * without consulting the authoritative representation; specifically, some
                               2272                 :      * later non-NUL byte in the longer string can resolve the comparison
                               2273                 :      * against a subsequent terminating NUL in the shorter string.  There will
                               2274                 :      * usually be what is effectively a "length-wise" resolution there and
                               2275                 :      * then.
                               2276                 :      *
                               2277                 :      * If that doesn't work out -- if all bytes in the longer string
 2617 tgl                      2278 ECB             :      * positioned at or past the offset of the smaller string's (first)
                               2279                 :      * terminating NUL are actually representative of NUL bytes in the
                               2280                 :      * authoritative binary string (perhaps with some *terminating* NUL bytes
                               2281                 :      * towards the end of the longer string iff it happens to still be small)
                               2282                 :      * -- then an authoritative tie-breaker will happen, and do the right
                               2283                 :      * thing: explicitly consider string length.
                               2284                 :      */
 2622 rhaas                    2285 GIC      577029 :     if (sss->collate_c)
   45 jdavis                   2286 GNC      257041 :         memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
                               2287                 :     else
                               2288                 :     {
                               2289                 :         Size        bsize;
                               2290                 : 
                               2291                 :         /*
                               2292                 :          * We're not using the C collation, so fall back on strxfrm or ICU
                               2293                 :          * analogs.
                               2294                 :          */
                               2295                 : 
                               2296                 :         /* By convention, we use buffer 1 to store and NUL-terminate */
 2622 rhaas                    2297 GIC      319988 :         if (len >= sss->buflen1)
                               2298                 :         {
                               2299              12 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
  238 tgl                      2300              12 :             sss->buf1 = repalloc(sss->buf1, sss->buflen1);
 2999 rhaas                    2301 ECB             :         }
 3002                          2302                 : 
                               2303                 :         /* Might be able to reuse strxfrm() blob from last call */
 2622 rhaas                    2304 CBC      319988 :         if (sss->last_len1 == len && sss->cache_blob &&
 2622 rhaas                    2305 GIC      312042 :             memcmp(sss->buf1, authoritative_data, len) == 0)
                               2306                 :         {
   45 jdavis                   2307 GNC      153194 :             memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
                               2308                 :             /* No change affecting cardinality, so no hashing required */
 2739 rhaas                    2309 GIC      153194 :             goto done;
                               2310                 :         }
                               2311                 : 
 2622                          2312          166794 :         memcpy(sss->buf1, authoritative_data, len);
                               2313                 : 
                               2314                 :         /*
                               2315                 :          * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated
                               2316                 :          * strings.
                               2317                 :          */
                               2318          166794 :         sss->buf1[len] = '\0';
 2622 rhaas                    2319 CBC      166794 :         sss->last_len1 = len;
 3002 rhaas                    2320 EUB             : 
   45 jdavis                   2321 GNC      166794 :         if (pg_strxfrm_prefix_enabled(sss->locale))
                               2322                 :         {
                               2323          166794 :             if (sss->buflen2 < max_prefix_bytes)
 2208 peter_e                  2324 ECB             :             {
   45 jdavis                   2325 UNC           0 :                 sss->buflen2 = Max(max_prefix_bytes,
                               2326                 :                                    Min(sss->buflen2 * 2, MaxAllocSize));
                               2327               0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
 2208 peter_e                  2328 ECB             :             }
                               2329                 : 
   45 jdavis                   2330 GNC      166794 :             bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
                               2331                 :                                       max_prefix_bytes, sss->locale);
   15 jdavis                   2332 CBC      166794 :             sss->last_len2 = bsize;
                               2333                 :         }
                               2334                 :         else
                               2335                 :         {
 2999 rhaas                    2336 ECB             :             /*
                               2337                 :              * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
                               2338                 :              * again.  The pg_strxfrm() function leaves the result buffer
                               2339                 :              * content undefined if the result did not fit, so we need to
                               2340                 :              * retry until everything fits, even though we only need the first
                               2341                 :              * few bytes in the end.
                               2342                 :              */
                               2343                 :             for (;;)
                               2344                 :             {
   45 jdavis                   2345 UNC           0 :                 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
                               2346                 :                                    sss->locale);
                               2347                 : 
                               2348               0 :                 sss->last_len2 = bsize;
                               2349               0 :                 if (bsize < sss->buflen2)
                               2350               0 :                     break;
                               2351                 : 
                               2352                 :                 /*
                               2353                 :                  * Grow buffer and retry.
                               2354                 :                  */
                               2355               0 :                 sss->buflen2 = Max(bsize + 1,
                               2356                 :                                    Min(sss->buflen2 * 2, MaxAllocSize));
                               2357               0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
                               2358                 :             }
 2999 rhaas                    2359 ECB             :         }
                               2360                 : 
                               2361                 :         /*
                               2362                 :          * Every Datum byte is always compared.  This is safe because the
                               2363                 :          * strxfrm() blob is itself NUL terminated, leaving no danger of
                               2364                 :          * misinterpreting any NUL bytes not intended to be interpreted as
                               2365                 :          * logically representing termination.
 2622                          2366                 :          *
                               2367                 :          * (Actually, even if there were NUL bytes in the blob it would be
                               2368                 :          * okay.  See remarks on bytea case above.)
                               2369                 :          */
   45 jdavis                   2370 GNC      166794 :         memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
 3002 rhaas                    2371 ECB             :     }
                               2372                 : 
                               2373                 :     /*
                               2374                 :      * Maintain approximate cardinality of both abbreviated keys and original,
                               2375                 :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
 2878 bruce                    2376                 :      * the worst case, where we do many string transformations for no saving
                               2377                 :      * in full strcoll()-based comparisons.  These statistics are used by
 2622 rhaas                    2378                 :      * varstr_abbrev_abort().
 3002                          2379                 :      *
                               2380                 :      * First, Hash key proper, or a significant fraction of it.  Mix in length
                               2381                 :      * in order to compensate for cases where differences are past
 2998                          2382                 :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
 3002                          2383                 :      */
 2929 rhaas                    2384 GIC      423835 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
                               2385                 :                                    Min(len, PG_CACHE_LINE_SIZE)));
 3002 rhaas                    2386 ECB             : 
 3002 rhaas                    2387 GIC      423835 :     if (len > PG_CACHE_LINE_SIZE)
 3002 rhaas                    2388 CBC          23 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
                               2389                 : 
 2622 rhaas                    2390 GIC      423835 :     addHyperLogLog(&sss->full_card, hash);
                               2391                 : 
 3002 rhaas                    2392 ECB             :     /* Hash abbreviated key */
                               2393                 : #if SIZEOF_DATUM == 8
                               2394                 :     {
 2878 bruce                    2395                 :         uint32      lohalf,
                               2396                 :                     hihalf;
 3002 rhaas                    2397                 : 
 3002 rhaas                    2398 CBC      423835 :         lohalf = (uint32) res;
 3002 rhaas                    2399 GIC      423835 :         hihalf = (uint32) (res >> 32);
 2929                          2400          423835 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
 3002 rhaas                    2401 ECB             :     }
                               2402                 : #else                           /* SIZEOF_DATUM != 8 */
 2929                          2403                 :     hash = DatumGetUInt32(hash_uint32((uint32) res));
 3002                          2404                 : #endif
 3002 rhaas                    2405 EUB             : 
 2622 rhaas                    2406 GIC      423835 :     addHyperLogLog(&sss->abbr_card, hash);
 3002 rhaas                    2407 ECB             : 
 2728                          2408                 :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
 2622 rhaas                    2409 GIC      423835 :     sss->cache_blob = true;
 2739                          2410          577029 : done:
 2617 tgl                      2411 ECB             : 
                               2412                 :     /*
 2739 rhaas                    2413                 :      * Byteswap on little-endian machines.
                               2414                 :      *
                               2415                 :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
                               2416                 :      * 3-way comparator) works correctly on all platforms.  If we didn't do
  332 tgl                      2417                 :      * this, the comparator would have to call memcmp() with a pair of
                               2418                 :      * pointers to the first byte of each abbreviated key, which is slower.
 2739 rhaas                    2419                 :      */
 2739 rhaas                    2420 CBC      577029 :     res = DatumBigEndianToNative(res);
 2739 rhaas                    2421 ECB             : 
 2841                          2422                 :     /* Don't leak memory here */
 2841 rhaas                    2423 CBC      577029 :     if (PointerGetDatum(authoritative) != original)
 2841 rhaas                    2424 GIC           6 :         pfree(authoritative);
                               2425                 : 
 3002 rhaas                    2426 CBC      577029 :     return res;
                               2427                 : }
 3002 rhaas                    2428 ECB             : 
 3002 rhaas                    2429 EUB             : /*
                               2430                 :  * Callback for estimating effectiveness of abbreviated key optimization, using
                               2431                 :  * heuristic rules.  Returns value indicating if the abbreviation optimization
 3002 rhaas                    2432 ECB             :  * should be aborted, based on its projected effectiveness.
                               2433                 :  */
                               2434                 : static bool
 2622 rhaas                    2435 GIC        1869 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
 3002 rhaas                    2436 ECB             : {
 2617 tgl                      2437 GIC        1869 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
 2878 bruce                    2438 ECB             :     double      abbrev_distinct,
                               2439                 :                 key_distinct;
                               2440                 : 
 3002 rhaas                    2441 GIC        1869 :     Assert(ssup->abbreviate);
 3002 rhaas                    2442 ECB             : 
                               2443                 :     /* Have a little patience */
 2928 rhaas                    2444 CBC        1869 :     if (memtupcount < 100)
 3002                          2445            1147 :         return false;
                               2446                 : 
 2622 rhaas                    2447 GIC         722 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
 2622 rhaas                    2448 CBC         722 :     key_distinct = estimateHyperLogLog(&sss->full_card);
 3002 rhaas                    2449 ECB             : 
                               2450                 :     /*
                               2451                 :      * Clamp cardinality estimates to at least one distinct value.  While
 2878 bruce                    2452                 :      * NULLs are generally disregarded, if only NULL values were seen so far,
                               2453                 :      * that might misrepresent costs if we failed to clamp.
 3002 rhaas                    2454                 :      */
 3002 rhaas                    2455 GIC         722 :     if (abbrev_distinct <= 1.0)
 3002 rhaas                    2456 UIC           0 :         abbrev_distinct = 1.0;
                               2457                 : 
 3002 rhaas                    2458 GBC         722 :     if (key_distinct <= 1.0)
 3002 rhaas                    2459 UIC           0 :         key_distinct = 1.0;
 3002 rhaas                    2460 EUB             : 
                               2461                 :     /*
                               2462                 :      * In the worst case all abbreviated keys are identical, while at the same
                               2463                 :      * time there are differences within full key strings not captured in
                               2464                 :      * abbreviations.
                               2465                 :      */
                               2466                 : #ifdef TRACE_SORT
 2924 rhaas                    2467 GIC         722 :     if (trace_sort)
 3002 rhaas                    2468 EUB             :     {
 2878 bruce                    2469 UIC           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
 3002 rhaas                    2470 EUB             : 
 2622 rhaas                    2471 UIC           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
                               2472                 :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
                               2473                 :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
                               2474                 :              sss->prop_card);
                               2475                 :     }
                               2476                 : #endif
                               2477                 : 
                               2478                 :     /*
                               2479                 :      * If the number of distinct abbreviated keys approximately matches the
 3002 rhaas                    2480 ECB             :      * number of distinct authoritative original keys, that's reason enough to
                               2481                 :      * proceed.  We can win even with a very low cardinality set if most
                               2482                 :      * tie-breakers only memcmp().  This is by far the most important
                               2483                 :      * consideration.
                               2484                 :      *
                               2485                 :      * While comparisons that are resolved at the abbreviated key level are
 3002 rhaas                    2486 EUB             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
                               2487                 :      * those two outcomes are so much cheaper than a full strcoll() once
                               2488                 :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
                               2489                 :      * cardinality against the overall size of the set in order to more
                               2490                 :      * accurately model costs.  Assume that an abbreviated comparison, and an
                               2491                 :      * abbreviated comparison with a cheap memcmp()-based authoritative
                               2492                 :      * resolution are equivalent.
                               2493                 :      */
 2622 rhaas                    2494 GBC         722 :     if (abbrev_distinct > key_distinct * sss->prop_card)
                               2495                 :     {
                               2496                 :         /*
                               2497                 :          * When we have exceeded 10,000 tuples, decay required cardinality
 2928 rhaas                    2498 ECB             :          * aggressively for next call.
                               2499                 :          *
 2878 bruce                    2500                 :          * This is useful because the number of comparisons required on
                               2501                 :          * average increases at a linearithmic rate, and at roughly 10,000
                               2502                 :          * tuples that factor will start to dominate over the linear costs of
                               2503                 :          * string transformation (this is a conservative estimate).  The decay
 2878 bruce                    2504 EUB             :          * rate is chosen to be a little less aggressive than halving -- which
                               2505                 :          * (since we're called at points at which memtupcount has doubled)
                               2506                 :          * would never see the cost model actually abort past the first call
                               2507                 :          * following a decay.  This decay rate is mostly a precaution against
                               2508                 :          * a sudden, violent swing in how well abbreviated cardinality tracks
                               2509                 :          * full key cardinality.  The decay also serves to prevent a marginal
                               2510                 :          * case from being aborted too late, when too much has already been
                               2511                 :          * invested in string transformation.
 2928 rhaas                    2512                 :          *
                               2513                 :          * It's possible for sets of several million distinct strings with
                               2514                 :          * mere tens of thousands of distinct abbreviated keys to still
                               2515                 :          * benefit very significantly.  This will generally occur provided
 2878 bruce                    2516                 :          * each abbreviated key is a proxy for a roughly uniform number of the
                               2517                 :          * set's full keys. If it isn't so, we hope to catch that early and
                               2518                 :          * abort.  If it isn't caught early, by the time the problem is
                               2519                 :          * apparent it's probably not worth aborting.
                               2520                 :          */
 2928 rhaas                    2521 GIC         693 :         if (memtupcount > 10000)
 2622 rhaas                    2522 GBC           3 :             sss->prop_card *= 0.65;
                               2523                 : 
 3002                          2524             693 :         return false;
                               2525                 :     }
                               2526                 : 
                               2527                 :     /*
                               2528                 :      * Abort abbreviation strategy.
                               2529                 :      *
                               2530                 :      * The worst case, where all abbreviated keys are identical while all
                               2531                 :      * original strings differ will typically only see a regression of about
                               2532                 :      * 10% in execution time for small to medium sized lists of strings.
                               2533                 :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
                               2534                 :      * often expect very large improvements, particularly with sets of strings
                               2535                 :      * of moderately high to high abbreviated cardinality.  There is little to
                               2536                 :      * lose but much to gain, which our strategy reflects.
                               2537                 :      */
                               2538                 : #ifdef TRACE_SORT
 2924 rhaas                    2539 CBC          29 :     if (trace_sort)
 2622 rhaas                    2540 UIC           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
                               2541                 :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
                               2542                 :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
                               2543                 : #endif
                               2544                 : 
 3002 rhaas                    2545 CBC          29 :     return true;
 3002 rhaas                    2546 ECB             : }
                               2547                 : 
 1138 pg                       2548                 : /*
                               2549                 :  * Generic equalimage support function for character type's operator classes.
                               2550                 :  * Disables the use of deduplication with nondeterministic collations.
                               2551                 :  */
 1138 pg                       2552 EUB             : Datum
 1138 pg                       2553 CBC       13033 : btvarstrequalimage(PG_FUNCTION_ARGS)
 1138 pg                       2554 ECB             : {
                               2555                 :     /* Oid      opcintype = PG_GETARG_OID(0); */
 1138 pg                       2556 CBC       13033 :     Oid         collid = PG_GET_COLLATION();
                               2557                 : 
 1138 pg                       2558 GIC       13033 :     check_collation_set(collid);
                               2559                 : 
                               2560           13033 :     if (lc_collate_is_c(collid) ||
 1138 pg                       2561 CBC          25 :         collid == DEFAULT_COLLATION_OID ||
 1138 pg                       2562 GIC          25 :         get_collation_isdeterministic(collid))
 1138 pg                       2563 CBC       13023 :         PG_RETURN_BOOL(true);
 1138 pg                       2564 ECB             :     else
 1138 pg                       2565 GIC          10 :         PG_RETURN_BOOL(false);
                               2566                 : }
 1138 pg                       2567 ECB             : 
                               2568                 : Datum
 8312 tgl                      2569 CBC      114807 : text_larger(PG_FUNCTION_ARGS)
 8888 lockhart                 2570 ECB             : {
 5847 tgl                      2571 GIC      114807 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
 5847 tgl                      2572 CBC      114807 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2573                 :     text       *result;
                               2574                 : 
 4443 peter_e                  2575 GIC      114807 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
                               2576                 : 
 8312 tgl                      2577 CBC      114807 :     PG_RETURN_TEXT_P(result);
                               2578                 : }
 8888 lockhart                 2579 ECB             : 
 8312 tgl                      2580                 : Datum
 8312 tgl                      2581 GIC       43065 : text_smaller(PG_FUNCTION_ARGS)
                               2582                 : {
 5847 tgl                      2583 CBC       43065 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
 5847 tgl                      2584 GIC       43065 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
 8720 bruce                    2585 ECB             :     text       *result;
 8888 lockhart                 2586                 : 
 4443 peter_e                  2587 GIC       43065 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
 8888 lockhart                 2588 ECB             : 
 8312 tgl                      2589 GIC       43065 :     PG_RETURN_TEXT_P(result);
                               2590                 : }
                               2591                 : 
                               2592                 : 
 1572 tgl                      2593 ECB             : /*
                               2594                 :  * Cross-type comparison functions for types text and name.
                               2595                 :  */
                               2596                 : 
                               2597                 : Datum
 1572 tgl                      2598 GIC       98804 : nameeqtext(PG_FUNCTION_ARGS)
 1572 tgl                      2599 ECB             : {
 1572 tgl                      2600 GIC       98804 :     Name        arg1 = PG_GETARG_NAME(0);
 1572 tgl                      2601 CBC       98804 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2602           98804 :     size_t      len1 = strlen(NameStr(*arg1));
 1572 tgl                      2603 GIC       98804 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
 1479 peter                    2604 CBC       98804 :     Oid         collid = PG_GET_COLLATION();
                               2605                 :     bool        result;
                               2606                 : 
 1479 peter                    2607 GIC       98804 :     check_collation_set(collid);
                               2608                 : 
 1479 peter                    2609 CBC       98804 :     if (collid == C_COLLATION_OID)
 1479 peter                    2610 GIC      148014 :         result = (len1 == len2 &&
 1479 peter                    2611 CBC       65114 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
 1479 peter                    2612 ECB             :     else
 1479 peter                    2613 GIC       15904 :         result = (varstr_cmp(NameStr(*arg1), len1,
                               2614           15904 :                              VARDATA_ANY(arg2), len2,
 1479 peter                    2615 ECB             :                              collid) == 0);
                               2616                 : 
 1572 tgl                      2617 CBC       98804 :     PG_FREE_IF_COPY(arg2, 1);
 1572 tgl                      2618 ECB             : 
 1572 tgl                      2619 GIC       98804 :     PG_RETURN_BOOL(result);
 1572 tgl                      2620 ECB             : }
                               2621                 : 
                               2622                 : Datum
 1572 tgl                      2623 GIC        3369 : texteqname(PG_FUNCTION_ARGS)
                               2624                 : {
 1572 tgl                      2625 CBC        3369 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
 1572 tgl                      2626 GIC        3369 :     Name        arg2 = PG_GETARG_NAME(1);
 1572 tgl                      2627 CBC        3369 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
                               2628            3369 :     size_t      len2 = strlen(NameStr(*arg2));
 1479 peter                    2629 GIC        3369 :     Oid         collid = PG_GET_COLLATION();
                               2630                 :     bool        result;
 1572 tgl                      2631 ECB             : 
 1479 peter                    2632 GIC        3369 :     check_collation_set(collid);
 1479 peter                    2633 ECB             : 
 1479 peter                    2634 CBC        3369 :     if (collid == C_COLLATION_OID)
 1479 peter                    2635 GIC         282 :         result = (len1 == len2 &&
 1479 peter                    2636 CBC          90 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
                               2637                 :     else
 1479 peter                    2638 GIC        3177 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
                               2639            3177 :                              NameStr(*arg2), len2,
                               2640                 :                              collid) == 0);
 1572 tgl                      2641 ECB             : 
 1572 tgl                      2642 GIC        3369 :     PG_FREE_IF_COPY(arg1, 0);
 1572 tgl                      2643 ECB             : 
 1572 tgl                      2644 GIC        3369 :     PG_RETURN_BOOL(result);
                               2645                 : }
 1572 tgl                      2646 ECB             : 
                               2647                 : Datum
 1572 tgl                      2648 GIC          18 : namenetext(PG_FUNCTION_ARGS)
 1572 tgl                      2649 ECB             : {
 1572 tgl                      2650 GIC          18 :     Name        arg1 = PG_GETARG_NAME(0);
 1572 tgl                      2651 CBC          18 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
 1572 tgl                      2652 GIC          18 :     size_t      len1 = strlen(NameStr(*arg1));
 1572 tgl                      2653 CBC          18 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
 1479 peter                    2654 GIC          18 :     Oid         collid = PG_GET_COLLATION();
                               2655                 :     bool        result;
                               2656                 : 
                               2657              18 :     check_collation_set(collid);
                               2658                 : 
                               2659              18 :     if (collid == C_COLLATION_OID)
                               2660               9 :         result = !(len1 == len2 &&
 1479 peter                    2661 UIC           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
                               2662                 :     else
 1479 peter                    2663 GIC           9 :         result = !(varstr_cmp(NameStr(*arg1), len1,
 1479 peter                    2664 CBC           9 :                               VARDATA_ANY(arg2), len2,
                               2665                 :                               collid) == 0);
 1572 tgl                      2666 ECB             : 
 1572 tgl                      2667 GIC          18 :     PG_FREE_IF_COPY(arg2, 1);
                               2668                 : 
 1572 tgl                      2669 CBC          18 :     PG_RETURN_BOOL(result);
                               2670                 : }
                               2671                 : 
                               2672                 : Datum
 1572 tgl                      2673 GIC           9 : textnename(PG_FUNCTION_ARGS)
                               2674                 : {
                               2675               9 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2676               9 :     Name        arg2 = PG_GETARG_NAME(1);
                               2677               9 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
                               2678               9 :     size_t      len2 = strlen(NameStr(*arg2));
 1479 peter                    2679               9 :     Oid         collid = PG_GET_COLLATION();
 1572 tgl                      2680 ECB             :     bool        result;
                               2681                 : 
 1479 peter                    2682 CBC           9 :     check_collation_set(collid);
 1479 peter                    2683 ECB             : 
 1479 peter                    2684 GIC           9 :     if (collid == C_COLLATION_OID)
 1479 peter                    2685 LBC           0 :         result = !(len1 == len2 &&
 1479 peter                    2686 UIC           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
                               2687                 :     else
 1479 peter                    2688 GIC           9 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
                               2689               9 :                               NameStr(*arg2), len2,
                               2690                 :                               collid) == 0);
                               2691                 : 
 1572 tgl                      2692               9 :     PG_FREE_IF_COPY(arg1, 0);
                               2693                 : 
                               2694               9 :     PG_RETURN_BOOL(result);
 1572 tgl                      2695 ECB             : }
                               2696                 : 
                               2697                 : Datum
 1572 tgl                      2698 GIC       81923 : btnametextcmp(PG_FUNCTION_ARGS)
                               2699                 : {
                               2700           81923 :     Name        arg1 = PG_GETARG_NAME(0);
                               2701           81923 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2702                 :     int32       result;
 1572 tgl                      2703 ECB             : 
 1572 tgl                      2704 CBC      163846 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
 1572 tgl                      2705 GIC      163846 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
                               2706                 :                         PG_GET_COLLATION());
 1572 tgl                      2707 ECB             : 
 1572 tgl                      2708 GBC       81923 :     PG_FREE_IF_COPY(arg2, 1);
 1572 tgl                      2709 ECB             : 
 1572 tgl                      2710 GBC       81923 :     PG_RETURN_INT32(result);
                               2711                 : }
 1572 tgl                      2712 ECB             : 
                               2713                 : Datum
 1572 tgl                      2714 UIC           0 : bttextnamecmp(PG_FUNCTION_ARGS)
                               2715                 : {
 1572 tgl                      2716 LBC           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
 1572 tgl                      2717 UIC           0 :     Name        arg2 = PG_GETARG_NAME(1);
                               2718                 :     int32       result;
 1572 tgl                      2719 ECB             : 
 1572 tgl                      2720 LBC           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
                               2721               0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
 1572 tgl                      2722 ECB             :                         PG_GET_COLLATION());
                               2723                 : 
 1572 tgl                      2724 UIC           0 :     PG_FREE_IF_COPY(arg1, 0);
 1572 tgl                      2725 ECB             : 
 1572 tgl                      2726 UIC           0 :     PG_RETURN_INT32(result);
                               2727                 : }
                               2728                 : 
                               2729                 : #define CmpCall(cmpfunc) \
                               2730                 :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
                               2731                 :                                           PG_GET_COLLATION(), \
                               2732                 :                                           PG_GETARG_DATUM(0), \
                               2733                 :                                           PG_GETARG_DATUM(1)))
                               2734                 : 
                               2735                 : Datum
 1572 tgl                      2736 GIC       26815 : namelttext(PG_FUNCTION_ARGS)
                               2737                 : {
                               2738           26815 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
                               2739                 : }
                               2740                 : 
                               2741                 : Datum
 1572 tgl                      2742 UIC           0 : nameletext(PG_FUNCTION_ARGS)
                               2743                 : {
                               2744               0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
                               2745                 : }
                               2746                 : 
 1572 tgl                      2747 ECB             : Datum
 1572 tgl                      2748 UIC           0 : namegttext(PG_FUNCTION_ARGS)
 1572 tgl                      2749 ECB             : {
 1572 tgl                      2750 UIC           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
                               2751                 : }
                               2752                 : 
                               2753                 : Datum
 1572 tgl                      2754 GIC       25974 : namegetext(PG_FUNCTION_ARGS)
                               2755                 : {
                               2756           25974 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
                               2757                 : }
                               2758                 : 
                               2759                 : Datum
 1572 tgl                      2760 UIC           0 : textltname(PG_FUNCTION_ARGS)
 1572 tgl                      2761 ECB             : {
 1572 tgl                      2762 UIC           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
 1572 tgl                      2763 ECB             : }
                               2764                 : 
                               2765                 : Datum
 1572 tgl                      2766 UIC           0 : textlename(PG_FUNCTION_ARGS)
                               2767                 : {
                               2768               0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
                               2769                 : }
 1572 tgl                      2770 ECB             : 
                               2771                 : Datum
 1572 tgl                      2772 UIC           0 : textgtname(PG_FUNCTION_ARGS)
                               2773                 : {
                               2774               0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
                               2775                 : }
                               2776                 : 
                               2777                 : Datum
                               2778               0 : textgename(PG_FUNCTION_ARGS)
                               2779                 : {
                               2780               0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
                               2781                 : }
 1572 tgl                      2782 ECB             : 
                               2783                 : #undef CmpCall
                               2784                 : 
                               2785                 : 
                               2786                 : /*
                               2787                 :  * The following operators support character-by-character comparison
                               2788                 :  * of text datums, to allow building indexes suitable for LIKE clauses.
                               2789                 :  * Note that the regular texteq/textne comparison operators, and regular
 2622 rhaas                    2790                 :  * support functions 1 and 2 with "C" collation are assumed to be
                               2791                 :  * compatible with these!
 7269 peter_e                  2792                 :  */
                               2793                 : 
                               2794                 : static int
 1296 tgl                      2795 CBC       76040 : internal_text_pattern_compare(text *arg1, text *arg2)
                               2796                 : {
                               2797                 :     int         result;
                               2798                 :     int         len1,
                               2799                 :                 len2;
 5430 tgl                      2800 ECB             : 
 5430 tgl                      2801 GIC       76040 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                               2802           76040 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               2803                 : 
 4492 rhaas                    2804           76040 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 7269 peter_e                  2805           76040 :     if (result != 0)
 7269 peter_e                  2806 CBC       76013 :         return result;
 5430 tgl                      2807 GIC          27 :     else if (len1 < len2)
 7269 peter_e                  2808 UIC           0 :         return -1;
 5430 tgl                      2809 GIC          27 :     else if (len1 > len2)
 7269 peter_e                  2810               9 :         return 1;
                               2811                 :     else
                               2812              18 :         return 0;
                               2813                 : }
                               2814                 : 
 7269 peter_e                  2815 ECB             : 
 7269 peter_e                  2816 EUB             : Datum
 7269 peter_e                  2817 GIC       19769 : text_pattern_lt(PG_FUNCTION_ARGS)
 7269 peter_e                  2818 ECB             : {
 5847 tgl                      2819 GIC       19769 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2820           19769 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2821                 :     int         result;
                               2822                 : 
 1296                          2823           19769 :     result = internal_text_pattern_compare(arg1, arg2);
                               2824                 : 
 7269 peter_e                  2825           19769 :     PG_FREE_IF_COPY(arg1, 0);
 7269 peter_e                  2826 CBC       19769 :     PG_FREE_IF_COPY(arg2, 1);
                               2827                 : 
 7269 peter_e                  2828 GIC       19769 :     PG_RETURN_BOOL(result < 0);
                               2829                 : }
                               2830                 : 
                               2831                 : 
                               2832                 : Datum
                               2833           18755 : text_pattern_le(PG_FUNCTION_ARGS)
                               2834                 : {
 5847 tgl                      2835           18755 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2836           18755 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
 7269 peter_e                  2837 ECB             :     int         result;
                               2838                 : 
 1296 tgl                      2839 CBC       18755 :     result = internal_text_pattern_compare(arg1, arg2);
 7269 peter_e                  2840 ECB             : 
 7269 peter_e                  2841 CBC       18755 :     PG_FREE_IF_COPY(arg1, 0);
                               2842           18755 :     PG_FREE_IF_COPY(arg2, 1);
                               2843                 : 
                               2844           18755 :     PG_RETURN_BOOL(result <= 0);
                               2845                 : }
                               2846                 : 
                               2847                 : 
 7269 peter_e                  2848 ECB             : Datum
 7269 peter_e                  2849 GIC       18755 : text_pattern_ge(PG_FUNCTION_ARGS)
 7269 peter_e                  2850 ECB             : {
 5847 tgl                      2851 CBC       18755 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2852           18755 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2853                 :     int         result;
                               2854                 : 
 1296                          2855           18755 :     result = internal_text_pattern_compare(arg1, arg2);
 7269 peter_e                  2856 ECB             : 
 7269 peter_e                  2857 GIC       18755 :     PG_FREE_IF_COPY(arg1, 0);
                               2858           18755 :     PG_FREE_IF_COPY(arg2, 1);
                               2859                 : 
 7269 peter_e                  2860 CBC       18755 :     PG_RETURN_BOOL(result >= 0);
                               2861                 : }
                               2862                 : 
                               2863                 : 
                               2864                 : Datum
 7269 peter_e                  2865 GIC       18755 : text_pattern_gt(PG_FUNCTION_ARGS)
                               2866                 : {
 5847 tgl                      2867           18755 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2868           18755 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2869                 :     int         result;
                               2870                 : 
 1296                          2871           18755 :     result = internal_text_pattern_compare(arg1, arg2);
 7269 peter_e                  2872 ECB             : 
 7269 peter_e                  2873 GBC       18755 :     PG_FREE_IF_COPY(arg1, 0);
 7269 peter_e                  2874 GIC       18755 :     PG_FREE_IF_COPY(arg2, 1);
                               2875                 : 
 7269 peter_e                  2876 CBC       18755 :     PG_RETURN_BOOL(result > 0);
 7269 peter_e                  2877 EUB             : }
                               2878                 : 
                               2879                 : 
                               2880                 : Datum
 7269 peter_e                  2881 CBC           6 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
 7269 peter_e                  2882 ECB             : {
 5847 tgl                      2883 CBC           6 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2884               6 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2885                 :     int         result;
 7269 peter_e                  2886 ECB             : 
 1296 tgl                      2887 GIC           6 :     result = internal_text_pattern_compare(arg1, arg2);
                               2888                 : 
 7269 peter_e                  2889               6 :     PG_FREE_IF_COPY(arg1, 0);
                               2890               6 :     PG_FREE_IF_COPY(arg2, 1);
                               2891                 : 
                               2892               6 :     PG_RETURN_INT32(result);
 7269 peter_e                  2893 ECB             : }
                               2894                 : 
                               2895                 : 
                               2896                 : Datum
 2622 rhaas                    2897 CBC          58 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
                               2898                 : {
 2622 rhaas                    2899 GIC          58 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
                               2900                 :     MemoryContext oldcontext;
                               2901                 : 
                               2902              58 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
                               2903                 : 
                               2904                 :     /* Use generic string SortSupport, forcing "C" collation */
 1572 tgl                      2905              58 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
                               2906                 : 
 2622 rhaas                    2907 GBC          58 :     MemoryContextSwitchTo(oldcontext);
                               2908                 : 
                               2909              58 :     PG_RETURN_VOID();
 2622 rhaas                    2910 EUB             : }
                               2911                 : 
                               2912                 : 
                               2913                 : /*-------------------------------------------------------------
                               2914                 :  * byteaoctetlen
                               2915                 :  *
                               2916                 :  * get the number of bytes contained in an instance of type 'bytea'
                               2917                 :  *-------------------------------------------------------------
                               2918                 :  */
 8289 tgl                      2919                 : Datum
 8289 tgl                      2920 GBC         157 : byteaoctetlen(PG_FUNCTION_ARGS)
                               2921                 : {
 7008                          2922             157 :     Datum       str = PG_GETARG_DATUM(0);
 7008 tgl                      2923 EUB             : 
                               2924                 :     /* We need not detoast the input at all */
 7008 tgl                      2925 GBC         157 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 9770 scrappy                  2926 EUB             : }
                               2927                 : 
 7877 bruce                    2928                 : /*
                               2929                 :  * byteacat -
                               2930                 :  *    takes two bytea* and returns a bytea* that is the concatenation of
                               2931                 :  *    the two.
                               2932                 :  *
                               2933                 :  * Cloned from textcat and modified as required.
                               2934                 :  */
                               2935                 : Datum
 7877 bruce                    2936 GIC         760 : byteacat(PG_FUNCTION_ARGS)
 7877 bruce                    2937 EUB             : {
 5847 tgl                      2938 GIC         760 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
                               2939             760 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
 4822 tgl                      2940 EUB             : 
 4822 tgl                      2941 GIC         760 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
                               2942                 : }
                               2943                 : 
                               2944                 : /*
                               2945                 :  * bytea_catenate
                               2946                 :  *  Guts of byteacat(), broken out so it can be used by other functions
                               2947                 :  *
                               2948                 :  * Arguments can be in short-header form, but not compressed or out-of-line
                               2949                 :  */
                               2950                 : static bytea *
 4822 tgl                      2951 CBC         778 : bytea_catenate(bytea *t1, bytea *t2)
                               2952                 : {
 4822 tgl                      2953 ECB             :     bytea      *result;
 7877 bruce                    2954                 :     int         len1,
                               2955                 :                 len2,
                               2956                 :                 len;
                               2957                 :     char       *ptr;
                               2958                 : 
 5847 tgl                      2959 GIC         778 :     len1 = VARSIZE_ANY_EXHDR(t1);
 4822 tgl                      2960 CBC         778 :     len2 = VARSIZE_ANY_EXHDR(t2);
 4822 tgl                      2961 ECB             : 
                               2962                 :     /* paranoia ... probably should throw error instead? */
 7877 bruce                    2963 GIC         778 :     if (len1 < 0)
 7877 bruce                    2964 UIC           0 :         len1 = 0;
 7877 bruce                    2965 GIC         778 :     if (len2 < 0)
 7877 bruce                    2966 LBC           0 :         len2 = 0;
                               2967                 : 
 7877 bruce                    2968 CBC         778 :     len = len1 + len2 + VARHDRSZ;
 7877 bruce                    2969 GIC         778 :     result = (bytea *) palloc(len);
                               2970                 : 
                               2971                 :     /* Set size of result string... */
 5885 tgl                      2972             778 :     SET_VARSIZE(result, len);
                               2973                 : 
                               2974                 :     /* Fill data field of result string... */
 7877 bruce                    2975             778 :     ptr = VARDATA(result);
                               2976             778 :     if (len1 > 0)
 5847 tgl                      2977             778 :         memcpy(ptr, VARDATA_ANY(t1), len1);
 7877 bruce                    2978             778 :     if (len2 > 0)
 5847 tgl                      2979             769 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
 7877 bruce                    2980 ECB             : 
 4822 tgl                      2981 GIC         778 :     return result;
 7877 bruce                    2982 ECB             : }
                               2983                 : 
                               2984                 : #define PG_STR_GET_BYTEA(str_) \
                               2985                 :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
                               2986                 : 
                               2987                 : /*
                               2988                 :  * bytea_substr()
                               2989                 :  * Return a substring starting at the specified position.
                               2990                 :  * Cloned from text_substr and modified as required.
                               2991                 :  *
                               2992                 :  * Input:
                               2993                 :  *  - string
                               2994                 :  *  - starting position (is one-based)
                               2995                 :  *  - string length (optional)
                               2996                 :  *
                               2997                 :  * If the starting position is zero or less, then return from the start of the string
 3641 peter_e                  2998                 :  * adjusting the length to be consistent with the "negative start" per SQL.
 7535 bruce                    2999                 :  * If the length is less than zero, an ERROR is thrown. If no third argument
                               3000                 :  * (length) is provided, the length to the end of the string is assumed.
 7877                          3001                 :  */
                               3002                 : Datum
 7877 bruce                    3003 CBC          43 : bytea_substr(PG_FUNCTION_ARGS)
 7877 bruce                    3004 ECB             : {
 4822 tgl                      3005 GIC          43 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
 4822 tgl                      3006 EUB             :                                       PG_GETARG_INT32(1),
                               3007                 :                                       PG_GETARG_INT32(2),
                               3008                 :                                       false));
                               3009                 : }
                               3010                 : 
                               3011                 : /*
                               3012                 :  * bytea_substr_no_len -
                               3013                 :  *    Wrapper to avoid opr_sanity failure due to
                               3014                 :  *    one function accepting a different number of args.
                               3015                 :  */
                               3016                 : Datum
 4822 tgl                      3017 GIC        1950 : bytea_substr_no_len(PG_FUNCTION_ARGS)
 4822 tgl                      3018 ECB             : {
 4822 tgl                      3019 GIC        1950 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
 4822 tgl                      3020 ECB             :                                       PG_GETARG_INT32(1),
                               3021                 :                                       -1,
                               3022                 :                                       true));
                               3023                 : }
                               3024                 : 
                               3025                 : static bytea *
 4822 tgl                      3026 GIC        2011 : bytea_substring(Datum str,
 4822 tgl                      3027 ECB             :                 int S,
                               3028                 :                 int L,
                               3029                 :                 bool length_not_specified)
                               3030                 : {
                               3031                 :     int32       S1;             /* adjusted start position */
                               3032                 :     int32       L1;             /* adjusted substring length */
                               3033                 :     int32       E;              /* end position */
                               3034                 : 
                               3035                 :     /*
  825                          3036                 :      * The logic here should generally match text_substring().
                               3037                 :      */
 7535 bruce                    3038 CBC        2011 :     S1 = Max(S, 1);
                               3039                 : 
 4822 tgl                      3040 GIC        2011 :     if (length_not_specified)
                               3041                 :     {
                               3042                 :         /*
                               3043                 :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
                               3044                 :          * end of the string if we pass it a negative value for length.
                               3045                 :          */
 7535 bruce                    3046            1959 :         L1 = -1;
                               3047                 :     }
  825 tgl                      3048              52 :     else if (L < 0)
                               3049                 :     {
  825 tgl                      3050 ECB             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
  825 tgl                      3051 GIC           6 :         ereport(ERROR,
  825 tgl                      3052 ECB             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
                               3053                 :                  errmsg("negative substring length not allowed")));
                               3054                 :         L1 = -1;                /* silence stupider compilers */
                               3055                 :     }
  825 tgl                      3056 GIC          46 :     else if (pg_add_s32_overflow(S, L, &E))
                               3057                 :     {
                               3058                 :         /*
                               3059                 :          * L could be large enough for S + L to overflow, in which case the
                               3060                 :          * substring must run to end of string.
 7535 bruce                    3061 ECB             :          */
  825 tgl                      3062 GIC           3 :         L1 = -1;
  825 tgl                      3063 ECB             :     }
                               3064                 :     else
                               3065                 :     {
                               3066                 :         /*
                               3067                 :          * A zero or negative value for the end position can happen if the
                               3068                 :          * start was negative or one. SQL99 says to return a zero-length
                               3069                 :          * string.
 7535 bruce                    3070                 :          */
 7535 bruce                    3071 CBC          43 :         if (E < 1)
 4822 tgl                      3072 UIC           0 :             return PG_STR_GET_BYTEA("");
                               3073                 : 
 7535 bruce                    3074 GIC          43 :         L1 = E - S1;
                               3075                 :     }
 7535 bruce                    3076 ECB             : 
 7522 bruce                    3077 EUB             :     /*
                               3078                 :      * If the start position is past the end of the string, SQL99 says to
                               3079                 :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
                               3080                 :      * us.  We need only convert S1 to zero-based starting position.
                               3081                 :      */
 4822 tgl                      3082 GIC        2005 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
                               3083                 : }
 7535 bruce                    3084 ECB             : 
                               3085                 : /*
 4822 tgl                      3086                 :  * byteaoverlay
                               3087                 :  *  Replace specified substring of first string with second
                               3088                 :  *
 4822 tgl                      3089 EUB             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
                               3090                 :  * This code is a direct implementation of what the standard says.
 7535 bruce                    3091 ECB             :  */
                               3092                 : Datum
 4822 tgl                      3093 CBC           3 : byteaoverlay(PG_FUNCTION_ARGS)
                               3094                 : {
 4822 tgl                      3095 GIC           3 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
                               3096               3 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
 2118                          3097               3 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
                               3098               3 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
                               3099                 : 
 4822                          3100               3 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
 4822 tgl                      3101 ECB             : }
                               3102                 : 
                               3103                 : Datum
 4822 tgl                      3104 GIC           6 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
                               3105                 : {
                               3106               6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
 4822 tgl                      3107 CBC           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
 2118 tgl                      3108 GIC           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
                               3109                 :     int         sl;
 4822 tgl                      3110 ECB             : 
 4790 bruce                    3111 CBC           6 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
 4822 tgl                      3112 GIC           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
                               3113                 : }
 4822 tgl                      3114 ECB             : 
                               3115                 : static bytea *
 4822 tgl                      3116 GIC           9 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
 4822 tgl                      3117 ECB             : {
                               3118                 :     bytea      *result;
                               3119                 :     bytea      *s1;
                               3120                 :     bytea      *s2;
                               3121                 :     int         sp_pl_sl;
                               3122                 : 
                               3123                 :     /*
 4790 bruce                    3124                 :      * Check for possible integer-overflow cases.  For negative sp, throw a
                               3125                 :      * "substring length" error because that's what should be expected
                               3126                 :      * according to the spec's definition of OVERLAY().
                               3127                 :      */
 4822 tgl                      3128 CBC           9 :     if (sp <= 0)
 4822 tgl                      3129 UIC           0 :         ereport(ERROR,
                               3130                 :                 (errcode(ERRCODE_SUBSTRING_ERROR),
                               3131                 :                  errmsg("negative substring length not allowed")));
 1944 andres                   3132 GIC           9 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
 4822 tgl                      3133 UIC           0 :         ereport(ERROR,
                               3134                 :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                               3135                 :                  errmsg("integer out of range")));
                               3136                 : 
 4790 bruce                    3137 GIC           9 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
 4822 tgl                      3138               9 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
                               3139               9 :     result = bytea_catenate(s1, t2);
                               3140               9 :     result = bytea_catenate(result, s2);
 4822 tgl                      3141 ECB             : 
 4822 tgl                      3142 GIC           9 :     return result;
                               3143                 : }
 7877 bruce                    3144 ECB             : 
                               3145                 : /*
                               3146                 :  * bit_count
                               3147                 :  */
                               3148                 : Datum
  747 peter                    3149 GIC           3 : bytea_bit_count(PG_FUNCTION_ARGS)
  747 peter                    3150 ECB             : {
  747 peter                    3151 GIC           3 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
  747 peter                    3152 ECB             : 
  747 peter                    3153 GBC           3 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
                               3154                 : }
                               3155                 : 
                               3156                 : /*
 7877 bruce                    3157 ECB             :  * byteapos -
 7877 bruce                    3158 EUB             :  *    Return the position of the specified substring.
                               3159                 :  *    Implements the SQL POSITION() function.
                               3160                 :  * Cloned from textpos and modified as required.
                               3161                 :  */
 7877 bruce                    3162 ECB             : Datum
 7877 bruce                    3163 UIC           0 : byteapos(PG_FUNCTION_ARGS)
 7877 bruce                    3164 ECB             : {
 5847 tgl                      3165 UIC           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
 5847 tgl                      3166 LBC           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
                               3167                 :     int         pos;
                               3168                 :     int         px,
 7877 bruce                    3169 ECB             :                 p;
                               3170                 :     int         len1,
                               3171                 :                 len2;
 7836                          3172                 :     char       *p1,
                               3173                 :                *p2;
                               3174                 : 
 5847 tgl                      3175 UIC           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
                               3176               0 :     len2 = VARSIZE_ANY_EXHDR(t2);
                               3177                 : 
                               3178               0 :     if (len2 <= 0)
                               3179               0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
                               3180                 : 
                               3181               0 :     p1 = VARDATA_ANY(t1);
                               3182               0 :     p2 = VARDATA_ANY(t2);
                               3183                 : 
 7877 bruce                    3184               0 :     pos = 0;
                               3185               0 :     px = (len1 - len2);
                               3186               0 :     for (p = 0; p <= px; p++)
                               3187                 :     {
                               3188               0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
                               3189                 :         {
                               3190               0 :             pos = p + 1;
                               3191               0 :             break;
                               3192                 :         };
                               3193               0 :         p1++;
                               3194                 :     };
                               3195                 : 
                               3196               0 :     PG_RETURN_INT32(pos);
                               3197                 : }
                               3198                 : 
 9770 scrappy                  3199 ECB             : /*-------------------------------------------------------------
                               3200                 :  * byteaGetByte
                               3201                 :  *
                               3202                 :  * this routine treats "bytea" as an array of bytes.
 8335 tgl                      3203                 :  * It returns the Nth byte (a number between 0 and 255).
                               3204                 :  *-------------------------------------------------------------
 9770 scrappy                  3205                 :  */
                               3206                 : Datum
 8335 tgl                      3207 CBC          30 : byteaGetByte(PG_FUNCTION_ARGS)
 9770 scrappy                  3208 EUB             : {
 5847 tgl                      3209 GIC          30 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
 8335 tgl                      3210 CBC          30 :     int32       n = PG_GETARG_INT32(1);
 9344 bruce                    3211 ECB             :     int         len;
                               3212                 :     int         byte;
                               3213                 : 
 5847 tgl                      3214 GIC          30 :     len = VARSIZE_ANY_EXHDR(v);
                               3215                 : 
 8416                          3216              30 :     if (n < 0 || n >= len)
 7196                          3217               3 :         ereport(ERROR,
                               3218                 :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 7196 tgl                      3219 ECB             :                  errmsg("index %d out of valid range, 0..%d",
                               3220                 :                         n, len - 1)));
                               3221                 : 
 5847 tgl                      3222 CBC          27 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
                               3223                 : 
 8335 tgl                      3224 GIC          27 :     PG_RETURN_INT32(byte);
 9770 scrappy                  3225 ECB             : }
                               3226                 : 
 9770 scrappy                  3227 EUB             : /*-------------------------------------------------------------
 9770 scrappy                  3228 ECB             :  * byteaGetBit
                               3229                 :  *
                               3230                 :  * This routine treats a "bytea" type like an array of bits.
                               3231                 :  * It returns the value of the Nth bit (0 or 1).
                               3232                 :  *
                               3233                 :  *-------------------------------------------------------------
                               3234                 :  */
 8335 tgl                      3235                 : Datum
 8335 tgl                      3236 GIC           6 : byteaGetBit(PG_FUNCTION_ARGS)
                               3237                 : {
 5847                          3238               6 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
 1097                          3239               6 :     int64       n = PG_GETARG_INT64(1);
                               3240                 :     int         byteNo,
                               3241                 :                 bitNo;
                               3242                 :     int         len;
 4382 bruce                    3243 ECB             :     int         byte;
 9345                          3244                 : 
 5847 tgl                      3245 CBC           6 :     len = VARSIZE_ANY_EXHDR(v);
 8416 tgl                      3246 ECB             : 
 1097 tgl                      3247 CBC           6 :     if (n < 0 || n >= (int64) len * 8)
 7196                          3248               3 :         ereport(ERROR,
 7196 tgl                      3249 EUB             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
                               3250                 :                  errmsg("index %lld out of valid range, 0..%lld",
                               3251                 :                         (long long) n, (long long) len * 8 - 1)));
                               3252                 : 
                               3253                 :     /* n/8 is now known < len, so safe to cast to int */
 1097 tgl                      3254 GIC           3 :     byteNo = (int) (n / 8);
                               3255               3 :     bitNo = (int) (n % 8);
                               3256                 : 
 5847                          3257               3 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
                               3258                 : 
 4382 bruce                    3259               3 :     if (byte & (1 << bitNo))
 8335 tgl                      3260 CBC           3 :         PG_RETURN_INT32(1);
 9345 bruce                    3261 ECB             :     else
 8335 tgl                      3262 LBC           0 :         PG_RETURN_INT32(0);
 9770 scrappy                  3263 ECB             : }
 9345 bruce                    3264                 : 
                               3265                 : /*-------------------------------------------------------------
                               3266                 :  * byteaSetByte
 9770 scrappy                  3267                 :  *
                               3268                 :  * Given an instance of type 'bytea' creates a new one with
                               3269                 :  * the Nth byte set to the given value.
                               3270                 :  *
                               3271                 :  *-------------------------------------------------------------
                               3272                 :  */
 8335 tgl                      3273                 : Datum
 8335 tgl                      3274 CBC           6 : byteaSetByte(PG_FUNCTION_ARGS)
                               3275                 : {
 2219 noah                     3276 GIC           6 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
 8335 tgl                      3277 CBC           6 :     int32       n = PG_GETARG_INT32(1);
                               3278               6 :     int32       newByte = PG_GETARG_INT32(2);
                               3279                 :     int         len;
 9345 bruce                    3280 ECB             : 
 2219 noah                     3281 GIC           6 :     len = VARSIZE(res) - VARHDRSZ;
                               3282                 : 
 8416 tgl                      3283 CBC           6 :     if (n < 0 || n >= len)
 7196 tgl                      3284 GIC           3 :         ereport(ERROR,
                               3285                 :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 7196 tgl                      3286 ECB             :                  errmsg("index %d out of valid range, 0..%d",
                               3287                 :                         n, len - 1)));
                               3288                 : 
                               3289                 :     /*
                               3290                 :      * Now set the byte.
 9345 bruce                    3291                 :      */
 8416 tgl                      3292 GIC           3 :     ((unsigned char *) VARDATA(res))[n] = newByte;
                               3293                 : 
 8335 tgl                      3294 CBC           3 :     PG_RETURN_BYTEA_P(res);
                               3295                 : }
 9770 scrappy                  3296 ECB             : 
                               3297                 : /*-------------------------------------------------------------
                               3298                 :  * byteaSetBit
                               3299                 :  *
                               3300                 :  * Given an instance of type 'bytea' creates a new one with
                               3301                 :  * the Nth bit set to the given value.
                               3302                 :  *
                               3303                 :  *-------------------------------------------------------------
                               3304                 :  */
                               3305                 : Datum
 8335 tgl                      3306 GIC           6 : byteaSetBit(PG_FUNCTION_ARGS)
                               3307                 : {
 2219 noah                     3308               6 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
 1097 tgl                      3309               6 :     int64       n = PG_GETARG_INT64(1);
 8335                          3310               6 :     int32       newBit = PG_GETARG_INT32(2);
                               3311                 :     int         len;
                               3312                 :     int         oldByte,
                               3313                 :                 newByte;
                               3314                 :     int         byteNo,
                               3315                 :                 bitNo;
                               3316                 : 
 2219 noah                     3317               6 :     len = VARSIZE(res) - VARHDRSZ;
                               3318                 : 
 1097 tgl                      3319               6 :     if (n < 0 || n >= (int64) len * 8)
 7196                          3320               3 :         ereport(ERROR,
                               3321                 :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
                               3322                 :                  errmsg("index %lld out of valid range, 0..%lld",
                               3323                 :                         (long long) n, (long long) len * 8 - 1)));
                               3324                 : 
                               3325                 :     /* n/8 is now known < len, so safe to cast to int */
 1097 tgl                      3326 CBC           3 :     byteNo = (int) (n / 8);
 1097 tgl                      3327 GIC           3 :     bitNo = (int) (n % 8);
                               3328                 : 
 9345 bruce                    3329 ECB             :     /*
                               3330                 :      * sanity check!
                               3331                 :      */
 9345 bruce                    3332 CBC           3 :     if (newBit != 0 && newBit != 1)
 7196 tgl                      3333 UIC           0 :         ereport(ERROR,
 7196 tgl                      3334 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 7196 tgl                      3335 EUB             :                  errmsg("new bit must be 0 or 1")));
                               3336                 : 
 9345 bruce                    3337 ECB             :     /*
 8335 tgl                      3338                 :      * Update the byte.
                               3339                 :      */
 8335 tgl                      3340 GIC           3 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
                               3341                 : 
 9345 bruce                    3342               3 :     if (newBit == 0)
                               3343               3 :         newByte = oldByte & (~(1 << bitNo));
                               3344                 :     else
 9345 bruce                    3345 UIC           0 :         newByte = oldByte | (1 << bitNo);
 9345 bruce                    3346 ECB             : 
 8335 tgl                      3347 GIC           3 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
                               3348                 : 
 8335 tgl                      3349 GBC           3 :     PG_RETURN_BYTEA_P(res);
                               3350                 : }
                               3351                 : 
 9081 lockhart                 3352 EUB             : 
                               3353                 : /* text_name()
 8312 tgl                      3354                 :  * Converts a text type to a Name type.
 9081 lockhart                 3355                 :  */
 8312 tgl                      3356                 : Datum
 8312 tgl                      3357 GIC       15263 : text_name(PG_FUNCTION_ARGS)
 9081 lockhart                 3358 EUB             : {
 5847 tgl                      3359 GBC       15263 :     text       *s = PG_GETARG_TEXT_PP(0);
                               3360                 :     Name        result;
                               3361                 :     int         len;
 9081 lockhart                 3362 EUB             : 
 5847 tgl                      3363 GIC       15263 :     len = VARSIZE_ANY_EXHDR(s);
                               3364                 : 
                               3365                 :     /* Truncate oversize input */
 8312                          3366           15263 :     if (len >= NAMEDATALEN)
 3971 tgl                      3367 CBC           3 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
 9081 lockhart                 3368 ECB             : 
                               3369                 :     /* We use palloc0 here to ensure result is zero-padded */
 3971 tgl                      3370 GIC       15263 :     result = (Name) palloc0(NAMEDATALEN);
 5847 tgl                      3371 CBC       15263 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
 9081 lockhart                 3372 ECB             : 
 8312 tgl                      3373 CBC       15263 :     PG_RETURN_NAME(result);
                               3374                 : }
 9081 lockhart                 3375 ECB             : 
 9081 lockhart                 3376 EUB             : /* name_text()
                               3377                 :  * Converts a Name type to a text type.
                               3378                 :  */
 8312 tgl                      3379 ECB             : Datum
 8312 tgl                      3380 GBC      525697 : name_text(PG_FUNCTION_ARGS)
                               3381                 : {
 8312 tgl                      3382 CBC      525697 :     Name        s = PG_GETARG_NAME(0);
                               3383                 : 
 5493 tgl                      3384 GBC      525697 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
 8312 tgl                      3385 EUB             : }
 7909                          3386                 : 
                               3387                 : 
                               3388                 : /*
 7680 tgl                      3389 ECB             :  * textToQualifiedNameList - convert a text object to list of names
                               3390                 :  *
                               3391                 :  * This implements the input parsing needed by nextval() and other
 7680 tgl                      3392 EUB             :  * functions that take a text parameter representing a qualified name.
                               3393                 :  * We split the name at dots, downcase if not double-quoted, and
                               3394                 :  * truncate names if they're too long.
 7680 tgl                      3395 ECB             :  */
                               3396                 : List *
 6526 neilc                    3397 GIC         685 : textToQualifiedNameList(text *textval)
 7680 tgl                      3398 ECB             : {
 7680 tgl                      3399 EUB             :     char       *rawname;
 7680 tgl                      3400 GIC         685 :     List       *result = NIL;
                               3401                 :     List       *namelist;
                               3402                 :     ListCell   *l;
                               3403                 : 
 7680 tgl                      3404 ECB             :     /* Convert to C string (handles possible detoasting). */
                               3405                 :     /* Note we rely on being able to modify rawname below. */
 5493 tgl                      3406 CBC         685 :     rawname = text_to_cstring(textval);
                               3407                 : 
 7678 tgl                      3408 GIC         685 :     if (!SplitIdentifierString(rawname, '.', &namelist))
 7196 tgl                      3409 LBC           0 :         ereport(ERROR,
                               3410                 :                 (errcode(ERRCODE_INVALID_NAME),
 7196 tgl                      3411 ECB             :                  errmsg("invalid name syntax")));
                               3412                 : 
 7678 tgl                      3413 GIC         685 :     if (namelist == NIL)
 7196 tgl                      3414 UIC           0 :         ereport(ERROR,
                               3415                 :                 (errcode(ERRCODE_INVALID_NAME),
                               3416                 :                  errmsg("invalid name syntax")));
                               3417                 : 
 7678 tgl                      3418 GIC        1425 :     foreach(l, namelist)
                               3419                 :     {
 7522 bruce                    3420             740 :         char       *curname = (char *) lfirst(l);
                               3421                 : 
 7678 tgl                      3422             740 :         result = lappend(result, makeString(pstrdup(curname)));
                               3423                 :     }
                               3424                 : 
                               3425             685 :     pfree(rawname);
 6888 neilc                    3426             685 :     list_free(namelist);
                               3427                 : 
 7678 tgl                      3428             685 :     return result;
                               3429                 : }
                               3430                 : 
                               3431                 : /*
                               3432                 :  * SplitIdentifierString --- parse a string containing identifiers
                               3433                 :  *
                               3434                 :  * This is the guts of textToQualifiedNameList, and is exported for use in
                               3435                 :  * other situations such as parsing GUC variables.  In the GUC case, it's
                               3436                 :  * important to avoid memory leaks, so the API is designed to minimize the
                               3437                 :  * amount of stuff that needs to be allocated and freed.
                               3438                 :  *
                               3439                 :  * Inputs:
                               3440                 :  *  rawstring: the input string; must be overwritable!  On return, it's
                               3441                 :  *             been modified to contain the separated identifiers.
                               3442                 :  *  separator: the separator punctuation expected between identifiers
                               3443                 :  *             (typically '.' or ',').  Whitespace may also appear around
                               3444                 :  *             identifiers.
                               3445                 :  * Outputs:
                               3446                 :  *  namelist: filled with a palloc'd list of pointers to identifiers within
 6414 neilc                    3447 ECB             :  *            rawstring.  Caller should list_free() this even on error return.
                               3448                 :  *
                               3449                 :  * Returns true if okay, false if there is a syntax error in the string.
 7678 tgl                      3450                 :  *
                               3451                 :  * Note that an empty string is considered okay here, though not in
                               3452                 :  * textToQualifiedNameList.
                               3453                 :  */
                               3454                 : bool
 7678 tgl                      3455 CBC       72651 : SplitIdentifierString(char *rawstring, char separator,
 7678 tgl                      3456 EUB             :                       List **namelist)
                               3457                 : {
 7678 tgl                      3458 CBC       72651 :     char       *nextp = rawstring;
                               3459           72651 :     bool        done = false;
                               3460                 : 
 7678 tgl                      3461 GIC       72651 :     *namelist = NIL;
                               3462                 : 
 2146                          3463           72651 :     while (scanner_isspace(*nextp))
 7678 tgl                      3464 UIC           0 :         nextp++;                /* skip leading whitespace */
                               3465                 : 
 7678 tgl                      3466 GIC       72651 :     if (*nextp == '\0')
 7678 tgl                      3467 CBC        9918 :         return true;            /* allow empty string */
                               3468                 : 
                               3469                 :     /* At the top of the loop, we are at start of a new identifier. */
 7680 tgl                      3470 ECB             :     do
                               3471                 :     {
                               3472                 :         char       *curname;
                               3473                 :         char       *endp;
                               3474                 : 
 2665 peter_e                  3475 GBC       99650 :         if (*nextp == '"')
 7680 tgl                      3476 ECB             :         {
                               3477                 :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
 7680 tgl                      3478 GIC       15347 :             curname = nextp + 1;
 7680 tgl                      3479 ECB             :             for (;;)
                               3480                 :             {
 2665 peter_e                  3481 GIC       15349 :                 endp = strchr(nextp + 1, '"');
 7680 tgl                      3482           15348 :                 if (endp == NULL)
 2118 tgl                      3483 LBC           0 :                     return false;   /* mismatched quotes */
 2665 peter_e                  3484 GIC       15348 :                 if (endp[1] != '"')
 7680 tgl                      3485           15347 :                     break;      /* found end of quoted name */
                               3486                 :                 /* Collapse adjacent quotes into one quote, and look again */
 7522 bruce                    3487               1 :                 memmove(endp, endp + 1, strlen(endp));
 7680 tgl                      3488 CBC           1 :                 nextp = endp;
 7680 tgl                      3489 ECB             :             }
 7678                          3490                 :             /* endp now points at the terminating quote */
 7680 tgl                      3491 CBC       15347 :             nextp = endp + 1;
 7680 tgl                      3492 ECB             :         }
                               3493                 :         else
 7680 tgl                      3494 EUB             :         {
                               3495                 :             /* Unquoted name --- extends to separator or whitespace */
                               3496                 :             char       *downname;
 6987 tgl                      3497 ECB             :             int         len;
 6987 tgl                      3498 EUB             : 
 7680 tgl                      3499 GIC       84303 :             curname = nextp;
 7678 tgl                      3500 CBC      727631 :             while (*nextp && *nextp != separator &&
 2146 tgl                      3501 GIC      643329 :                    !scanner_isspace(*nextp))
 7678 tgl                      3502 CBC      643328 :                 nextp++;
                               3503           84303 :             endp = nextp;
                               3504           84303 :             if (curname == nextp)
 7678 tgl                      3505 UIC           0 :                 return false;   /* empty unquoted name not allowed */
                               3506                 : 
 6987 tgl                      3507 ECB             :             /*
 6385 bruce                    3508                 :              * Downcase the identifier, using same code as main lexer does.
                               3509                 :              *
 6987 tgl                      3510 EUB             :              * XXX because we want to overwrite the input in-place, we cannot
                               3511                 :              * support a downcasing transformation that increases the string
                               3512                 :              * length.  This is not a problem given the current implementation
 6385 bruce                    3513 ECB             :              * of downcase_truncate_identifier, but we'll probably have to do
                               3514                 :              * something about this someday.
                               3515                 :              */
 6987 tgl                      3516 GIC       84303 :             len = endp - curname;
                               3517           84303 :             downname = downcase_truncate_identifier(curname, len, false);
 6987 tgl                      3518 CBC       84303 :             Assert(strlen(downname) <= len);
 2997 tgl                      3519 GIC       84303 :             strncpy(curname, downname, len);    /* strncpy is required here */
 6987                          3520           84303 :             pfree(downname);
 7680 tgl                      3521 ECB             :         }
                               3522                 : 
 2146 tgl                      3523 CBC       99651 :         while (scanner_isspace(*nextp))
 7678 tgl                      3524 GIC           1 :             nextp++;            /* skip trailing whitespace */
                               3525                 : 
                               3526           99650 :         if (*nextp == separator)
                               3527                 :         {
                               3528           36917 :             nextp++;
 2146                          3529           59216 :             while (scanner_isspace(*nextp))
 7678                          3530           22299 :                 nextp++;        /* skip leading whitespace for next */
                               3531                 :             /* we expect another name, so done remains false */
                               3532                 :         }
                               3533           62733 :         else if (*nextp == '\0')
                               3534           62732 :             done = true;
                               3535                 :         else
 7678 tgl                      3536 CBC           1 :             return false;       /* invalid syntax */
                               3537                 : 
 7678 tgl                      3538 ECB             :         /* Now safe to overwrite separator with a null */
 7678 tgl                      3539 CBC       99649 :         *endp = '\0';
                               3540                 : 
                               3541                 :         /* Truncate name if it's overlength */
 6987 tgl                      3542 GIC       99649 :         truncate_identifier(curname, strlen(curname), false);
                               3543                 : 
                               3544                 :         /*
                               3545                 :          * Finished isolating current name --- add it to list
                               3546                 :          */
 7678                          3547           99649 :         *namelist = lappend(*namelist, curname);
 7680 tgl                      3548 ECB             : 
 7678                          3549                 :         /* Loop back if we didn't reach end of string */
 7678 tgl                      3550 CBC       99649 :     } while (!done);
 7680 tgl                      3551 ECB             : 
 7678 tgl                      3552 GIC       62732 :     return true;
                               3553                 : }
 7680 tgl                      3554 ECB             : 
                               3555                 : 
                               3556                 : /*
 2119                          3557                 :  * SplitDirectoriesString --- parse a string containing file/directory names
                               3558                 :  *
                               3559                 :  * This works fine on file names too; the function name is historical.
 3894                          3560                 :  *
                               3561                 :  * This is similar to SplitIdentifierString, except that the parsing
                               3562                 :  * rules are meant to handle pathnames instead of identifiers: there is
                               3563                 :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
 3867                          3564                 :  * and we apply canonicalize_path() to each extracted string.  Because of the
                               3565                 :  * last, the returned strings are separately palloc'd rather than being
                               3566                 :  * pointers into rawstring --- but we still scribble on rawstring.
                               3567                 :  *
 3894                          3568                 :  * Inputs:
                               3569                 :  *  rawstring: the input string; must be modifiable!
                               3570                 :  *  separator: the separator punctuation expected between directories
 3260 bruce                    3571                 :  *             (typically ',' or ';').  Whitespace may also appear around
                               3572                 :  *             directories.
                               3573                 :  * Outputs:
                               3574                 :  *  namelist: filled with a palloc'd list of directory names.
                               3575                 :  *            Caller should list_free_deep() this even on error return.
                               3576                 :  *
                               3577                 :  * Returns true if okay, false if there is a syntax error in the string.
                               3578                 :  *
                               3579                 :  * Note that an empty string is considered okay here.
 3894 tgl                      3580                 :  */
                               3581                 : bool
 3894 tgl                      3582 CBC         621 : SplitDirectoriesString(char *rawstring, char separator,
 3894 tgl                      3583 EUB             :                        List **namelist)
                               3584                 : {
 3894 tgl                      3585 GIC         621 :     char       *nextp = rawstring;
 3894 tgl                      3586 CBC         621 :     bool        done = false;
 3894 tgl                      3587 ECB             : 
 3894 tgl                      3588 GIC         621 :     *namelist = NIL;
 3894 tgl                      3589 ECB             : 
 2146 tgl                      3590 GIC         621 :     while (scanner_isspace(*nextp))
 3894 tgl                      3591 UIC           0 :         nextp++;                /* skip leading whitespace */
 3894 tgl                      3592 ECB             : 
 3894 tgl                      3593 CBC         621 :     if (*nextp == '\0')
 3894 tgl                      3594 GIC           3 :         return true;            /* allow empty string */
                               3595                 : 
 3894 tgl                      3596 ECB             :     /* At the top of the loop, we are at start of a new directory. */
                               3597                 :     do
                               3598                 :     {
                               3599                 :         char       *curname;
                               3600                 :         char       *endp;
                               3601                 : 
 2665 peter_e                  3602 CBC         618 :         if (*nextp == '"')
 3894 tgl                      3603 ECB             :         {
                               3604                 :             /* Quoted name --- collapse quote-quote pairs */
 3894 tgl                      3605 UIC           0 :             curname = nextp + 1;
                               3606                 :             for (;;)
                               3607                 :             {
 2665 peter_e                  3608 LBC           0 :                 endp = strchr(nextp + 1, '"');
 3894 tgl                      3609               0 :                 if (endp == NULL)
 2118 tgl                      3610 UIC           0 :                     return false;   /* mismatched quotes */
 2665 peter_e                  3611 LBC           0 :                 if (endp[1] != '"')
 3894 tgl                      3612 UIC           0 :                     break;      /* found end of quoted name */
 3894 tgl                      3613 ECB             :                 /* Collapse adjacent quotes into one quote, and look again */
 3894 tgl                      3614 LBC           0 :                 memmove(endp, endp + 1, strlen(endp));
 3894 tgl                      3615 UIC           0 :                 nextp = endp;
 3894 tgl                      3616 ECB             :             }
                               3617                 :             /* endp now points at the terminating quote */
 3894 tgl                      3618 UIC           0 :             nextp = endp + 1;
                               3619                 :         }
 3894 tgl                      3620 ECB             :         else
                               3621                 :         {
 3867                          3622                 :             /* Unquoted name --- extends to separator or end of string */
 3867 tgl                      3623 CBC         618 :             curname = endp = nextp;
 3867 tgl                      3624 GIC       10456 :             while (*nextp && *nextp != separator)
                               3625                 :             {
                               3626                 :                 /* trailing whitespace should not be included in name */
 2146                          3627            9838 :                 if (!scanner_isspace(*nextp))
 3867 tgl                      3628 CBC        9838 :                     endp = nextp + 1;
 3894                          3629            9838 :                 nextp++;
                               3630                 :             }
 3867                          3631             618 :             if (curname == endp)
 3894 tgl                      3632 UIC           0 :                 return false;   /* empty unquoted name not allowed */
 3894 tgl                      3633 ECB             :         }
                               3634                 : 
 2146 tgl                      3635 GIC         618 :         while (scanner_isspace(*nextp))
 3894 tgl                      3636 LBC           0 :             nextp++;            /* skip trailing whitespace */
                               3637                 : 
 3894 tgl                      3638 GIC         618 :         if (*nextp == separator)
                               3639                 :         {
 3894 tgl                      3640 LBC           0 :             nextp++;
 2146 tgl                      3641 UIC           0 :             while (scanner_isspace(*nextp))
 3894 tgl                      3642 LBC           0 :                 nextp++;        /* skip leading whitespace for next */
 3894 tgl                      3643 ECB             :             /* we expect another name, so done remains false */
                               3644                 :         }
 3894 tgl                      3645 GIC         618 :         else if (*nextp == '\0')
                               3646             618 :             done = true;
                               3647                 :         else
 3894 tgl                      3648 LBC           0 :             return false;       /* invalid syntax */
 3894 tgl                      3649 ECB             : 
                               3650                 :         /* Now safe to overwrite separator with a null */
 3894 tgl                      3651 CBC         618 :         *endp = '\0';
                               3652                 : 
 3894 tgl                      3653 ECB             :         /* Truncate path if it's overlength */
 3894 tgl                      3654 CBC         618 :         if (strlen(curname) >= MAXPGPATH)
 3894 tgl                      3655 UIC           0 :             curname[MAXPGPATH - 1] = '\0';
 3894 tgl                      3656 ECB             : 
                               3657                 :         /*
                               3658                 :          * Finished isolating current name --- add it to list
                               3659                 :          */
 3894 tgl                      3660 CBC         618 :         curname = pstrdup(curname);
 3894 tgl                      3661 GIC         618 :         canonicalize_path(curname);
 3894 tgl                      3662 CBC         618 :         *namelist = lappend(*namelist, curname);
 3894 tgl                      3663 ECB             : 
                               3664                 :         /* Loop back if we didn't reach end of string */
 3894 tgl                      3665 GIC         618 :     } while (!done);
                               3666                 : 
                               3667             618 :     return true;
 3894 tgl                      3668 ECB             : }
                               3669                 : 
                               3670                 : 
 1713                          3671                 : /*
                               3672                 :  * SplitGUCList --- parse a string containing identifiers or file names
                               3673                 :  *
                               3674                 :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
                               3675                 :  * presuming whether the elements will be taken as identifiers or file names.
                               3676                 :  * We assume the input has already been through flatten_set_variable_args(),
                               3677                 :  * so that we need never downcase (if appropriate, that was done already).
                               3678                 :  * Nor do we ever truncate, since we don't know the correct max length.
                               3679                 :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
                               3680                 :  * because any embedded whitespace should have led to double-quoting).
                               3681                 :  * Otherwise the API is identical to SplitIdentifierString.
                               3682                 :  *
                               3683                 :  * XXX it's annoying to have so many copies of this string-splitting logic.
                               3684                 :  * However, it's not clear that having one function with a bunch of option
                               3685                 :  * flags would be much better.
                               3686                 :  *
                               3687                 :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
                               3688                 :  * Be sure to update that if you have to change this.
                               3689                 :  *
                               3690                 :  * Inputs:
                               3691                 :  *  rawstring: the input string; must be overwritable!  On return, it's
                               3692                 :  *             been modified to contain the separated identifiers.
                               3693                 :  *  separator: the separator punctuation expected between identifiers
                               3694                 :  *             (typically '.' or ',').  Whitespace may also appear around
                               3695                 :  *             identifiers.
                               3696                 :  * Outputs:
                               3697                 :  *  namelist: filled with a palloc'd list of pointers to identifiers within
                               3698                 :  *            rawstring.  Caller should list_free() this even on error return.
                               3699                 :  *
                               3700                 :  * Returns true if okay, false if there is a syntax error in the string.
                               3701                 :  */
                               3702                 : bool
 1713 tgl                      3703 GIC        2458 : SplitGUCList(char *rawstring, char separator,
 1713 tgl                      3704 ECB             :              List **namelist)
                               3705                 : {
 1713 tgl                      3706 GIC        2458 :     char       *nextp = rawstring;
 1713 tgl                      3707 CBC        2458 :     bool        done = false;
                               3708                 : 
 1713 tgl                      3709 GIC        2458 :     *namelist = NIL;
 1713 tgl                      3710 ECB             : 
 1713 tgl                      3711 GIC        2458 :     while (scanner_isspace(*nextp))
 1713 tgl                      3712 LBC           0 :         nextp++;                /* skip leading whitespace */
                               3713                 : 
 1713 tgl                      3714 CBC        2458 :     if (*nextp == '\0')
 1713 tgl                      3715 GIC        2416 :         return true;            /* allow empty string */
                               3716                 : 
                               3717                 :     /* At the top of the loop, we are at start of a new identifier. */
                               3718                 :     do
                               3719                 :     {
                               3720                 :         char       *curname;
                               3721                 :         char       *endp;
                               3722                 : 
                               3723              55 :         if (*nextp == '"')
 1713 tgl                      3724 ECB             :         {
                               3725                 :             /* Quoted name --- collapse quote-quote pairs */
 1713 tgl                      3726 CBC          12 :             curname = nextp + 1;
 1713 tgl                      3727 ECB             :             for (;;)
                               3728                 :             {
 1713 tgl                      3729 GIC          18 :                 endp = strchr(nextp + 1, '"');
                               3730              15 :                 if (endp == NULL)
 1713 tgl                      3731 UIC           0 :                     return false;   /* mismatched quotes */
 1713 tgl                      3732 GIC          15 :                 if (endp[1] != '"')
                               3733              12 :                     break;      /* found end of quoted name */
                               3734                 :                 /* Collapse adjacent quotes into one quote, and look again */
                               3735               3 :                 memmove(endp, endp + 1, strlen(endp));
                               3736               3 :                 nextp = endp;
                               3737                 :             }
 1713 tgl                      3738 ECB             :             /* endp now points at the terminating quote */
 1713 tgl                      3739 GIC          12 :             nextp = endp + 1;
 1713 tgl                      3740 ECB             :         }
                               3741                 :         else
                               3742                 :         {
                               3743                 :             /* Unquoted name --- extends to separator or whitespace */
 1713 tgl                      3744 GIC          43 :             curname = nextp;
                               3745             409 :             while (*nextp && *nextp != separator &&
                               3746             366 :                    !scanner_isspace(*nextp))
                               3747             366 :                 nextp++;
                               3748              43 :             endp = nextp;
                               3749              43 :             if (curname == nextp)
 1713 tgl                      3750 UIC           0 :                 return false;   /* empty unquoted name not allowed */
                               3751                 :         }
                               3752                 : 
 1713 tgl                      3753 CBC          55 :         while (scanner_isspace(*nextp))
 1713 tgl                      3754 LBC           0 :             nextp++;            /* skip trailing whitespace */
                               3755                 : 
 1713 tgl                      3756 GIC          55 :         if (*nextp == separator)
 1713 tgl                      3757 ECB             :         {
 1713 tgl                      3758 GIC          13 :             nextp++;
 1713 tgl                      3759 GBC          22 :             while (scanner_isspace(*nextp))
 1713 tgl                      3760 GIC           9 :                 nextp++;        /* skip leading whitespace for next */
                               3761                 :             /* we expect another name, so done remains false */
 1713 tgl                      3762 ECB             :         }
 1713 tgl                      3763 GIC          42 :         else if (*nextp == '\0')
 1713 tgl                      3764 CBC          42 :             done = true;
                               3765                 :         else
 1713 tgl                      3766 UIC           0 :             return false;       /* invalid syntax */
 1713 tgl                      3767 ECB             : 
                               3768                 :         /* Now safe to overwrite separator with a null */
 1713 tgl                      3769 CBC          55 :         *endp = '\0';
 1713 tgl                      3770 ECB             : 
                               3771                 :         /*
                               3772                 :          * Finished isolating current name --- add it to list
                               3773                 :          */
 1713 tgl                      3774 GIC          55 :         *namelist = lappend(*namelist, curname);
 1713 tgl                      3775 ECB             : 
                               3776                 :         /* Loop back if we didn't reach end of string */
 1713 tgl                      3777 GIC          55 :     } while (!done);
                               3778                 : 
 1713 tgl                      3779 CBC          42 :     return true;
                               3780                 : }
                               3781                 : 
 1713 tgl                      3782 ECB             : 
 7909                          3783                 : /*****************************************************************************
                               3784                 :  *  Comparison Functions used for bytea
                               3785                 :  *
                               3786                 :  * Note: btree indexes need these routines not to leak memory; therefore,
                               3787                 :  * be careful to free working copies of toasted datums.  Most places don't
                               3788                 :  * need to be so careful.
                               3789                 :  *****************************************************************************/
                               3790                 : 
                               3791                 : Datum
 7909 tgl                      3792 GIC        5188 : byteaeq(PG_FUNCTION_ARGS)
 7909 tgl                      3793 ECB             : {
 4464 tgl                      3794 GIC        5188 :     Datum       arg1 = PG_GETARG_DATUM(0);
                               3795            5188 :     Datum       arg2 = PG_GETARG_DATUM(1);
 7909 tgl                      3796 ECB             :     bool        result;
 4464                          3797                 :     Size        len1,
                               3798                 :                 len2;
 7909                          3799                 : 
                               3800                 :     /*
 4464                          3801                 :      * We can use a fast path for unequal lengths, which might save us from
                               3802                 :      * having to detoast one or both values.
                               3803                 :      */
 4464 tgl                      3804 CBC        5188 :     len1 = toast_raw_datum_size(arg1);
 4464 tgl                      3805 GIC        5188 :     len2 = toast_raw_datum_size(arg2);
 7909                          3806            5188 :     if (len1 != len2)
                               3807            2154 :         result = false;
                               3808                 :     else
                               3809                 :     {
 4464                          3810            3034 :         bytea      *barg1 = DatumGetByteaPP(arg1);
                               3811            3034 :         bytea      *barg2 = DatumGetByteaPP(arg2);
                               3812                 : 
                               3813            3034 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
                               3814                 :                          len1 - VARHDRSZ) == 0);
 4464 tgl                      3815 ECB             : 
 4464 tgl                      3816 GIC        3034 :         PG_FREE_IF_COPY(barg1, 0);
 4464 tgl                      3817 CBC        3034 :         PG_FREE_IF_COPY(barg2, 1);
 4464 tgl                      3818 ECB             :     }
 7909                          3819                 : 
 7909 tgl                      3820 GIC        5188 :     PG_RETURN_BOOL(result);
 7909 tgl                      3821 ECB             : }
                               3822                 : 
                               3823                 : Datum
 7909 tgl                      3824 CBC         384 : byteane(PG_FUNCTION_ARGS)
 7909 tgl                      3825 ECB             : {
 4464 tgl                      3826 CBC         384 :     Datum       arg1 = PG_GETARG_DATUM(0);
                               3827             384 :     Datum       arg2 = PG_GETARG_DATUM(1);
                               3828                 :     bool        result;
 4464 tgl                      3829 ECB             :     Size        len1,
                               3830                 :                 len2;
 7909                          3831                 : 
 4464                          3832                 :     /*
                               3833                 :      * We can use a fast path for unequal lengths, which might save us from
                               3834                 :      * having to detoast one or both values.
                               3835                 :      */
 4464 tgl                      3836 GIC         384 :     len1 = toast_raw_datum_size(arg1);
 4464 tgl                      3837 CBC         384 :     len2 = toast_raw_datum_size(arg2);
 7909 tgl                      3838 GIC         384 :     if (len1 != len2)
 7909 tgl                      3839 UIC           0 :         result = true;
                               3840                 :     else
                               3841                 :     {
 4464 tgl                      3842 GIC         384 :         bytea      *barg1 = DatumGetByteaPP(arg1);
                               3843             384 :         bytea      *barg2 = DatumGetByteaPP(arg2);
                               3844                 : 
                               3845             384 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
                               3846                 :                          len1 - VARHDRSZ) != 0);
                               3847                 : 
 4464 tgl                      3848 CBC         384 :         PG_FREE_IF_COPY(barg1, 0);
 4464 tgl                      3849 GIC         384 :         PG_FREE_IF_COPY(barg2, 1);
                               3850                 :     }
                               3851                 : 
 7909 tgl                      3852 CBC         384 :     PG_RETURN_BOOL(result);
 7909 tgl                      3853 ECB             : }
                               3854                 : 
                               3855                 : Datum
 7909 tgl                      3856 GIC        4158 : bytealt(PG_FUNCTION_ARGS)
 7909 tgl                      3857 ECB             : {
 5847 tgl                      3858 GIC        4158 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
                               3859            4158 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
                               3860                 :     int         len1,
                               3861                 :                 len2;
 7909 tgl                      3862 ECB             :     int         cmp;
                               3863                 : 
 5847 tgl                      3864 CBC        4158 :     len1 = VARSIZE_ANY_EXHDR(arg1);
 5847 tgl                      3865 GIC        4158 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               3866                 : 
 5847 tgl                      3867 CBC        4158 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 7909 tgl                      3868 ECB             : 
 7909 tgl                      3869 GIC        4158 :     PG_FREE_IF_COPY(arg1, 0);
                               3870            4158 :     PG_FREE_IF_COPY(arg2, 1);
 7909 tgl                      3871 ECB             : 
 7909 tgl                      3872 CBC        4158 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
 7909 tgl                      3873 ECB             : }
                               3874                 : 
                               3875                 : Datum
 7909 tgl                      3876 GIC        3178 : byteale(PG_FUNCTION_ARGS)
                               3877                 : {
 5847 tgl                      3878 CBC        3178 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
                               3879            3178 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
                               3880                 :     int         len1,
                               3881                 :                 len2;
 7909 tgl                      3882 ECB             :     int         cmp;
                               3883                 : 
 5847 tgl                      3884 GIC        3178 :     len1 = VARSIZE_ANY_EXHDR(arg1);
 5847 tgl                      3885 CBC        3178 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               3886                 : 
                               3887            3178 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 7909 tgl                      3888 ECB             : 
 7909 tgl                      3889 CBC        3178 :     PG_FREE_IF_COPY(arg1, 0);
 7909 tgl                      3890 GIC        3178 :     PG_FREE_IF_COPY(arg2, 1);
 7909 tgl                      3891 ECB             : 
 7909 tgl                      3892 GIC        3178 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
                               3893                 : }
 7909 tgl                      3894 ECB             : 
                               3895                 : Datum
 7909 tgl                      3896 CBC        3114 : byteagt(PG_FUNCTION_ARGS)
                               3897                 : {
 5847                          3898            3114 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
 5847 tgl                      3899 GIC        3114 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
                               3900                 :     int         len1,
 7909 tgl                      3901 ECB             :                 len2;
                               3902                 :     int         cmp;
                               3903                 : 
 5847 tgl                      3904 GIC        3114 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                               3905            3114 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               3906                 : 
                               3907            3114 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
                               3908                 : 
 7909                          3909            3114 :     PG_FREE_IF_COPY(arg1, 0);
                               3910            3114 :     PG_FREE_IF_COPY(arg2, 1);
                               3911                 : 
 7909 tgl                      3912 CBC        3114 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
 7909 tgl                      3913 ECB             : }
                               3914                 : 
                               3915                 : Datum
 7909 tgl                      3916 CBC        2505 : byteage(PG_FUNCTION_ARGS)
                               3917                 : {
 5847 tgl                      3918 GIC        2505 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
                               3919            2505 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
                               3920                 :     int         len1,
                               3921                 :                 len2;
                               3922                 :     int         cmp;
                               3923                 : 
                               3924            2505 :     len1 = VARSIZE_ANY_EXHDR(arg1);
 5847 tgl                      3925 CBC        2505 :     len2 = VARSIZE_ANY_EXHDR(arg2);
 7909 tgl                      3926 ECB             : 
 5847 tgl                      3927 CBC        2505 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 7909 tgl                      3928 ECB             : 
 7909 tgl                      3929 CBC        2505 :     PG_FREE_IF_COPY(arg1, 0);
 7909 tgl                      3930 GIC        2505 :     PG_FREE_IF_COPY(arg2, 1);
                               3931                 : 
 7909 tgl                      3932 CBC        2505 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
                               3933                 : }
                               3934                 : 
                               3935                 : Datum
 7909 tgl                      3936 GIC       43878 : byteacmp(PG_FUNCTION_ARGS)
                               3937                 : {
 5847                          3938           43878 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
                               3939           43878 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
                               3940                 :     int         len1,
                               3941                 :                 len2;
                               3942                 :     int         cmp;
                               3943                 : 
                               3944           43878 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                               3945           43878 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                               3946                 : 
                               3947           43878 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 7909 tgl                      3948 CBC       43878 :     if ((cmp == 0) && (len1 != len2))
 7909 tgl                      3949 GIC        7326 :         cmp = (len1 < len2) ? -1 : 1;
                               3950                 : 
                               3951           43878 :     PG_FREE_IF_COPY(arg1, 0);
                               3952           43878 :     PG_FREE_IF_COPY(arg2, 1);
                               3953                 : 
                               3954           43878 :     PG_RETURN_INT32(cmp);
 7909 tgl                      3955 ECB             : }
 7535 bruce                    3956                 : 
                               3957                 : Datum
 2622 rhaas                    3958 GIC          13 : bytea_sortsupport(PG_FUNCTION_ARGS)
 2622 rhaas                    3959 ECB             : {
 2622 rhaas                    3960 GIC          13 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
                               3961                 :     MemoryContext oldcontext;
                               3962                 : 
                               3963              13 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
                               3964                 : 
                               3965                 :     /* Use generic string SortSupport, forcing "C" collation */
 1572 tgl                      3966 CBC          13 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
                               3967                 : 
 2622 rhaas                    3968 GIC          13 :     MemoryContextSwitchTo(oldcontext);
 2622 rhaas                    3969 ECB             : 
 2622 rhaas                    3970 CBC          13 :     PG_RETURN_VOID();
                               3971                 : }
                               3972                 : 
 6488 bruce                    3973 ECB             : /*
                               3974                 :  * appendStringInfoText
                               3975                 :  *
                               3976                 :  * Append a text to str.
                               3977                 :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
                               3978                 :  */
                               3979                 : static void
 6488 bruce                    3980 CBC      832552 : appendStringInfoText(StringInfo str, const text *t)
                               3981                 : {
 5678 tgl                      3982 GIC      832552 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 6488 bruce                    3983          832552 : }
 6488 bruce                    3984 ECB             : 
                               3985                 : /*
                               3986                 :  * replace_text
 7335 tgl                      3987                 :  * replace all occurrences of 'old_sub_str' in 'orig_str'
 7535 bruce                    3988                 :  * with 'new_sub_str' to form 'new_str'
                               3989                 :  *
                               3990                 :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
                               3991                 :  * otherwise returns 'new_str'
                               3992                 :  */
                               3993                 : Datum
 7535 bruce                    3994 CBC        1275 : replace_text(PG_FUNCTION_ARGS)
                               3995                 : {
 5678 tgl                      3996            1275 :     text       *src_text = PG_GETARG_TEXT_PP(0);
 5678 tgl                      3997 GIC        1275 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
                               3998            1275 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
                               3999                 :     int         src_text_len;
                               4000                 :     int         from_sub_text_len;
                               4001                 :     TextPositionState state;
                               4002                 :     text       *ret_text;
                               4003                 :     int         chunk_len;
                               4004                 :     char       *curr_ptr;
 5996 tgl                      4005 ECB             :     char       *start_ptr;
 6248 neilc                    4006                 :     StringInfoData str;
                               4007                 :     bool        found;
 7535 bruce                    4008                 : 
 1535 heikki.linnakangas       4009 GIC        1275 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
                               4010            1275 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
                               4011                 : 
 5743 tgl                      4012 EUB             :     /* Return unmodified source string if empty source or pattern */
 5743 tgl                      4013 GBC        1275 :     if (src_text_len < 1 || from_sub_text_len < 1)
                               4014                 :     {
 5743 tgl                      4015 UIC           0 :         PG_RETURN_TEXT_P(src_text);
                               4016                 :     }
                               4017                 : 
 1479 peter                    4018 GIC        1275 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
                               4019                 : 
 1535 heikki.linnakangas       4020            1275 :     found = text_position_next(&state);
 6488 bruce                    4021 ECB             : 
                               4022                 :     /* When the from_sub_text is not found, there is nothing to do. */
 1535 heikki.linnakangas       4023 GIC        1275 :     if (!found)
                               4024                 :     {
 6028 tgl                      4025             390 :         text_position_cleanup(&state);
 6488 bruce                    4026             390 :         PG_RETURN_TEXT_P(src_text);
                               4027                 :     }
 1535 heikki.linnakangas       4028             885 :     curr_ptr = text_position_get_match_ptr(&state);
 5678 tgl                      4029 CBC         885 :     start_ptr = VARDATA_ANY(src_text);
 5996 tgl                      4030 ECB             : 
 6248 neilc                    4031 GBC         885 :     initStringInfo(&str);
 7535 bruce                    4032 ECB             : 
                               4033                 :     do
                               4034                 :     {
 5743 tgl                      4035 GIC        3119 :         CHECK_FOR_INTERRUPTS();
                               4036                 : 
                               4037                 :         /* copy the data skipped over by last text_position_next() */
 1535 heikki.linnakangas       4038            3119 :         chunk_len = curr_ptr - start_ptr;
 5996 tgl                      4039 CBC        3119 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
                               4040                 : 
 6248 neilc                    4041 GIC        3119 :         appendStringInfoText(&str, to_sub_text);
                               4042                 : 
 1535 heikki.linnakangas       4043 CBC        3119 :         start_ptr = curr_ptr + from_sub_text_len;
 5996 tgl                      4044 ECB             : 
 1535 heikki.linnakangas       4045 CBC        3119 :         found = text_position_next(&state);
 1535 heikki.linnakangas       4046 GIC        3119 :         if (found)
                               4047            2234 :             curr_ptr = text_position_get_match_ptr(&state);
                               4048                 :     }
                               4049            3119 :     while (found);
                               4050                 : 
 5996 tgl                      4051 ECB             :     /* copy trailing data */
 5678 tgl                      4052 CBC         885 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
 5996 tgl                      4053 GIC         885 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
                               4054                 : 
 6028                          4055             885 :     text_position_cleanup(&state);
                               4056                 : 
 5493                          4057             885 :     ret_text = cstring_to_text_with_len(str.data, str.len);
 6248 neilc                    4058 CBC         885 :     pfree(str.data);
 7535 bruce                    4059 ECB             : 
 7535 bruce                    4060 GIC         885 :     PG_RETURN_TEXT_P(ret_text);
                               4061                 : }
 7535 bruce                    4062 ECB             : 
                               4063                 : /*
                               4064                 :  * check_replace_text_has_escape
 6382 tgl                      4065                 :  *
  608                          4066                 :  * Returns 0 if text contains no backslashes that need processing.
                               4067                 :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
                               4068                 :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
                               4069                 :  */
                               4070                 : static int
  608 tgl                      4071 GIC        5263 : check_replace_text_has_escape(const text *replace_text)
 6482 bruce                    4072 ECB             : {
  608 tgl                      4073 CBC        5263 :     int         result = 0;
 5678 tgl                      4074 GIC        5263 :     const char *p = VARDATA_ANY(replace_text);
                               4075            5263 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
                               4076                 : 
  608                          4077           10548 :     while (p < p_end)
                               4078                 :     {
                               4079                 :         /* Find next escape char, if any. */
                               4080            4635 :         p = memchr(p, '\\', p_end - p);
  608 tgl                      4081 CBC        4635 :         if (p == NULL)
                               4082            4296 :             break;
                               4083             339 :         p++;
                               4084                 :         /* Note: a backslash at the end doesn't require extra processing. */
  608 tgl                      4085 GIC         339 :         if (p < p_end)
                               4086                 :         {
                               4087             339 :             if (*p >= '1' && *p <= '9')
                               4088             317 :                 return 2;       /* Found a submatch specifier, so done */
  608 tgl                      4089 CBC          22 :             result = 1;         /* Found some other sequence, keep looking */
  608 tgl                      4090 GIC          22 :             p++;
                               4091                 :         }
                               4092                 :     }
  608 tgl                      4093 CBC        4946 :     return result;
 6482 bruce                    4094 ECB             : }
                               4095                 : 
                               4096                 : /*
                               4097                 :  * appendStringInfoRegexpSubstr
 6382 tgl                      4098                 :  *
                               4099                 :  * Append replace_text to str, substituting regexp back references for
                               4100                 :  * \n escapes.  start_ptr is the start of the match in the source string,
 5996                          4101                 :  * at logical character position data_pos.
                               4102                 :  */
                               4103                 : static void
 6482 bruce                    4104 GIC         106 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
                               4105                 :                              regmatch_t *pmatch,
                               4106                 :                              char *start_ptr, int data_pos)
                               4107                 : {
 5678 tgl                      4108             106 :     const char *p = VARDATA_ANY(replace_text);
                               4109             106 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
 6482 bruce                    4110 ECB             : 
  608 tgl                      4111 GIC         263 :     while (p < p_end)
 6482 bruce                    4112 ECB             :     {
 6382 tgl                      4113 CBC         235 :         const char *chunk_start = p;
 6382 tgl                      4114 ECB             :         int         so;
                               4115                 :         int         eo;
                               4116                 : 
                               4117                 :         /* Find next escape char, if any. */
  608 tgl                      4118 GIC         235 :         p = memchr(p, '\\', p_end - p);
                               4119             235 :         if (p == NULL)
                               4120              75 :             p = p_end;
                               4121                 : 
                               4122                 :         /* Copy the text we just scanned over, if any. */
 6382                          4123             235 :         if (p > chunk_start)
 6382 tgl                      4124 CBC         147 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
 6482 bruce                    4125 ECB             : 
                               4126                 :         /* Done if at end of string, else advance over escape char. */
 6382 tgl                      4127 GIC         235 :         if (p >= p_end)
 6482 bruce                    4128              75 :             break;
 6482 bruce                    4129 CBC         160 :         p++;
 6382 tgl                      4130 ECB             : 
 6382 tgl                      4131 GIC         160 :         if (p >= p_end)
                               4132                 :         {
 6382 tgl                      4133 ECB             :             /* Escape at very end of input.  Treat same as unexpected char */
 6382 tgl                      4134 CBC           3 :             appendStringInfoChar(str, '\\');
 6382 tgl                      4135 GIC           3 :             break;
                               4136                 :         }
 6482 bruce                    4137 ECB             : 
 6482 bruce                    4138 GIC         157 :         if (*p >= '1' && *p <= '9')
                               4139             127 :         {
 6482 bruce                    4140 ECB             :             /* Use the back reference of regexp. */
 6385 bruce                    4141 CBC         127 :             int         idx = *p - '0';
                               4142                 : 
 6482                          4143             127 :             so = pmatch[idx].rm_so;
 6482 bruce                    4144 GIC         127 :             eo = pmatch[idx].rm_eo;
                               4145             127 :             p++;
                               4146                 :         }
 6482 bruce                    4147 CBC          30 :         else if (*p == '&')
                               4148                 :         {
 6482 bruce                    4149 ECB             :             /* Use the entire matched string. */
 6482 bruce                    4150 GIC           9 :             so = pmatch[0].rm_so;
                               4151               9 :             eo = pmatch[0].rm_eo;
 6482 bruce                    4152 CBC           9 :             p++;
                               4153                 :         }
 6382 tgl                      4154              21 :         else if (*p == '\\')
                               4155                 :         {
 6382 tgl                      4156 ECB             :             /* \\ means transfer one \ to output. */
 6382 tgl                      4157 CBC          18 :             appendStringInfoChar(str, '\\');
 6382 tgl                      4158 GIC          18 :             p++;
 6382 tgl                      4159 CBC          18 :             continue;
                               4160                 :         }
                               4161                 :         else
                               4162                 :         {
                               4163                 :             /*
                               4164                 :              * If escape char is not followed by any expected char, just treat
                               4165                 :              * it as ordinary data to copy.  (XXX would it be better to throw
 6347 bruce                    4166 ECB             :              * an error?)
                               4167                 :              */
 6382 tgl                      4168 GIC           3 :             appendStringInfoChar(str, '\\');
 6382 tgl                      4169 CBC           3 :             continue;
                               4170                 :         }
 6482 bruce                    4171 ECB             : 
  608 tgl                      4172 CBC         136 :         if (so >= 0 && eo >= 0)
                               4173                 :         {
                               4174                 :             /*
 3260 bruce                    4175 ECB             :              * Copy the text that is back reference of regexp.  Note so and eo
                               4176                 :              * are counted in characters not bytes.
 6382 tgl                      4177                 :              */
 5996                          4178                 :             char       *chunk_start;
                               4179                 :             int         chunk_len;
                               4180                 : 
 5996 tgl                      4181 GIC         136 :             Assert(so >= data_pos);
                               4182             136 :             chunk_start = start_ptr;
                               4183             136 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
                               4184             136 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
 5996 tgl                      4185 CBC         136 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
                               4186                 :         }
                               4187                 :     }
 6482 bruce                    4188             106 : }
                               4189                 : 
 6482 bruce                    4190 ECB             : /*
                               4191                 :  * replace_text_regexp
                               4192                 :  *
                               4193                 :  * replace substring(s) in src_text that match pattern with replace_text.
                               4194                 :  * The replace_text can contain backslash markers to substitute
  608 tgl                      4195                 :  * (parts of) the matched text.
  614                          4196                 :  *
  608                          4197                 :  * cflags: regexp compile flags.
                               4198                 :  * collation: collation to use.
                               4199                 :  * search_start: the character (not byte) offset in src_text at which to
                               4200                 :  * begin searching.
  614                          4201                 :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
 6482 bruce                    4202                 :  */
                               4203                 : text *
  608 tgl                      4204 CBC        5263 : replace_text_regexp(text *src_text, text *pattern_text,
                               4205                 :                     text *replace_text,
                               4206                 :                     int cflags, Oid collation,
  614 tgl                      4207 ECB             :                     int search_start, int n)
 6482 bruce                    4208                 : {
                               4209                 :     text       *ret_text;
  608 tgl                      4210                 :     regex_t    *re;
 5678 tgl                      4211 GIC        5263 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
  614                          4212            5263 :     int         nmatches = 0;
 6031 bruce                    4213 ECB             :     StringInfoData buf;
                               4214                 :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
  608 tgl                      4215 CBC        5263 :     int         nmatch = lengthof(pmatch);
                               4216                 :     pg_wchar   *data;
                               4217                 :     size_t      data_len;
                               4218                 :     int         data_pos;
 5996 tgl                      4219 ECB             :     char       *start_ptr;
                               4220                 :     int         escape_status;
 6482 bruce                    4221                 : 
 6248 neilc                    4222 GIC        5263 :     initStringInfo(&buf);
 6248 neilc                    4223 ECB             : 
                               4224                 :     /* Convert data string to wide characters. */
 6482 bruce                    4225 GIC        5263 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
 5678 tgl                      4226            5263 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
 6482 bruce                    4227 ECB             : 
                               4228                 :     /* Check whether replace_text has escapes, especially regexp submatches. */
  608 tgl                      4229 GIC        5263 :     escape_status = check_replace_text_has_escape(replace_text);
                               4230                 : 
                               4231                 :     /* If no regexp submatches, we can use REG_NOSUB. */
  608 tgl                      4232 CBC        5263 :     if (escape_status < 2)
                               4233                 :     {
  608 tgl                      4234 GIC        4946 :         cflags |= REG_NOSUB;
  608 tgl                      4235 ECB             :         /* Also tell pg_regexec we only want the whole-match location. */
  608 tgl                      4236 GIC        4946 :         nmatch = 1;
                               4237                 :     }
                               4238                 : 
                               4239                 :     /* Prepare the regexp. */
                               4240            5263 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
                               4241                 : 
 5996 tgl                      4242 ECB             :     /* start_ptr points to the data_pos'th character of src_text */
 5678 tgl                      4243 GIC        5263 :     start_ptr = (char *) VARDATA_ANY(src_text);
 5996 tgl                      4244 CBC        5263 :     data_pos = 0;
                               4245                 : 
 5996 tgl                      4246 GIC        7251 :     while (search_start <= data_len)
                               4247                 :     {
                               4248                 :         int         regexec_result;
                               4249                 : 
                               4250            7248 :         CHECK_FOR_INTERRUPTS();
                               4251                 : 
 6482 bruce                    4252            7248 :         regexec_result = pg_regexec(re,
                               4253                 :                                     data,
                               4254                 :                                     data_len,
                               4255                 :                                     search_start,
 2118 tgl                      4256 ECB             :                                     NULL,   /* no details */
                               4257                 :                                     nmatch,
                               4258                 :                                     pmatch,
                               4259                 :                                     0);
                               4260                 : 
 6248 neilc                    4261 CBC        7248 :         if (regexec_result == REG_NOMATCH)
 6248 neilc                    4262 GIC        4583 :             break;
 6248 neilc                    4263 ECB             : 
 6248 neilc                    4264 CBC        2665 :         if (regexec_result != REG_OKAY)
                               4265                 :         {
 6385 bruce                    4266 ECB             :             char        errMsg[100];
 6482                          4267                 : 
 6482 bruce                    4268 LBC           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
 6482 bruce                    4269 UIC           0 :             ereport(ERROR,
                               4270                 :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                               4271                 :                      errmsg("regular expression failed: %s", errMsg)));
                               4272                 :         }
                               4273                 : 
                               4274                 :         /*
                               4275                 :          * Count matches, and decide whether to replace this match.
                               4276                 :          */
  614 tgl                      4277 GIC        2665 :         nmatches++;
                               4278            2665 :         if (n > 0 && nmatches != n)
                               4279                 :         {
                               4280                 :             /*
  614 tgl                      4281 ECB             :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
                               4282                 :              * we treat the matched text as if it weren't matched, and copy it
                               4283                 :              * to the output later.)
                               4284                 :              */
  614 tgl                      4285 GIC          30 :             search_start = pmatch[0].rm_eo;
                               4286              30 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
  614 tgl                      4287 UIC           0 :                 search_start++;
  614 tgl                      4288 GIC          30 :             continue;
                               4289                 :         }
                               4290                 : 
                               4291                 :         /*
 5624 bruce                    4292 ECB             :          * Copy the text to the left of the match position.  Note we are given
                               4293                 :          * character not byte indexes.
 6385                          4294                 :          */
 6482 bruce                    4295 GIC        2635 :         if (pmatch[0].rm_so - data_pos > 0)
                               4296                 :         {
 5996 tgl                      4297 ECB             :             int         chunk_len;
                               4298                 : 
 5996 tgl                      4299 CBC        2550 :             chunk_len = charlen_to_bytelen(start_ptr,
                               4300            2550 :                                            pmatch[0].rm_so - data_pos);
 5996 tgl                      4301 GIC        2550 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
 5624 bruce                    4302 ECB             : 
                               4303                 :             /*
                               4304                 :              * Advance start_ptr over that text, to avoid multiple rescans of
                               4305                 :              * it if the replace_text contains multiple back-references.
                               4306                 :              */
 5996 tgl                      4307 GIC        2550 :             start_ptr += chunk_len;
                               4308            2550 :             data_pos = pmatch[0].rm_so;
                               4309                 :         }
                               4310                 : 
                               4311                 :         /*
                               4312                 :          * Copy the replace_text, processing escapes if any are present.
                               4313                 :          */
  608                          4314            2635 :         if (escape_status > 0)
 5996                          4315             106 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
 5996 tgl                      4316 ECB             :                                          start_ptr, data_pos);
                               4317                 :         else
 6248 neilc                    4318 CBC        2529 :             appendStringInfoText(&buf, replace_text);
                               4319                 : 
                               4320                 :         /* Advance start_ptr and data_pos over the matched text. */
 5996 tgl                      4321 GIC        5270 :         start_ptr += charlen_to_bytelen(start_ptr,
                               4322            2635 :                                         pmatch[0].rm_eo - data_pos);
                               4323            2635 :         data_pos = pmatch[0].rm_eo;
                               4324                 : 
                               4325                 :         /*
                               4326                 :          * If we only want to replace one occurrence, we're done.
                               4327                 :          */
  614                          4328            2635 :         if (n > 0)
 6482 bruce                    4329             677 :             break;
                               4330                 : 
                               4331                 :         /*
 3539 tgl                      4332 ECB             :          * Advance search position.  Normally we start the next search at the
                               4333                 :          * end of the previous match; but if the match was of zero length, we
                               4334                 :          * have to advance by one character, or we'd just find the same match
                               4335                 :          * again.
                               4336                 :          */
 5996 tgl                      4337 CBC        1958 :         search_start = data_pos;
 6482 bruce                    4338 GIC        1958 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
                               4339               6 :             search_start++;
                               4340                 :     }
                               4341                 : 
                               4342                 :     /*
                               4343                 :      * Copy the text to the right of the last match.
 6482 bruce                    4344 ECB             :      */
 6482 bruce                    4345 CBC        5263 :     if (data_pos < data_len)
                               4346                 :     {
 5996 tgl                      4347 ECB             :         int         chunk_len;
                               4348                 : 
 5678 tgl                      4349 GIC        5032 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
 5996 tgl                      4350 CBC        5032 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
 6482 bruce                    4351 ECB             :     }
                               4352                 : 
 5493 tgl                      4353 CBC        5263 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
 6248 neilc                    4354 GIC        5263 :     pfree(buf.data);
 6482 bruce                    4355            5263 :     pfree(data);
 6482 bruce                    4356 ECB             : 
 6382 tgl                      4357 CBC        5263 :     return ret_text;
                               4358                 : }
 6482 bruce                    4359 ECB             : 
                               4360                 : /*
  949 tgl                      4361                 :  * split_part
                               4362                 :  * parse input string based on provided field separator
                               4363                 :  * return N'th item (1 based, negative counts from end)
                               4364                 :  */
                               4365                 : Datum
  949 tgl                      4366 GIC          51 : split_part(PG_FUNCTION_ARGS)
                               4367                 : {
 5678                          4368              51 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
 5678 tgl                      4369 CBC          51 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
 7535 bruce                    4370              51 :     int         fldnum = PG_GETARG_INT32(2);
                               4371                 :     int         inputstring_len;
                               4372                 :     int         fldsep_len;
 6028 tgl                      4373 ECB             :     TextPositionState state;
 1535 heikki.linnakangas       4374                 :     char       *start_ptr;
                               4375                 :     char       *end_ptr;
                               4376                 :     text       *result_text;
                               4377                 :     bool        found;
                               4378                 : 
 7008 tgl                      4379                 :     /* field number is 1 based */
  877 tgl                      4380 GIC          51 :     if (fldnum == 0)
 7008 tgl                      4381 CBC           3 :         ereport(ERROR,
                               4382                 :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               4383                 :                  errmsg("field position must not be zero")));
 7008 tgl                      4384 ECB             : 
 1535 heikki.linnakangas       4385 GIC          48 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
 1535 heikki.linnakangas       4386 CBC          48 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
                               4387                 : 
                               4388                 :     /* return empty string for empty input string */
 7535 bruce                    4389              48 :     if (inputstring_len < 1)
 5493 tgl                      4390 GIC           6 :         PG_RETURN_TEXT_P(cstring_to_text(""));
                               4391                 : 
                               4392                 :     /* handle empty field separator */
 7535 bruce                    4393              42 :     if (fldsep_len < 1)
 7535 bruce                    4394 ECB             :     {
                               4395                 :         /* if first or last field, return input string, else empty string */
  877 tgl                      4396 CBC          12 :         if (fldnum == 1 || fldnum == -1)
 7535 bruce                    4397               6 :             PG_RETURN_TEXT_P(inputstring);
                               4398                 :         else
 5493 tgl                      4399 GIC           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
 7535 bruce                    4400 ECB             :     }
                               4401                 : 
                               4402                 :     /* find the first field separator */
 1479 peter                    4403 GIC          30 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
                               4404                 : 
 1535 heikki.linnakangas       4405              30 :     found = text_position_next(&state);
 7535 bruce                    4406 ECB             : 
 6028 tgl                      4407                 :     /* special case if fldsep not found at all */
 1535 heikki.linnakangas       4408 GIC          30 :     if (!found)
                               4409                 :     {
 6028 tgl                      4410               6 :         text_position_cleanup(&state);
  877 tgl                      4411 ECB             :         /* if first or last field, return input string, else empty string */
  877 tgl                      4412 GIC           6 :         if (fldnum == 1 || fldnum == -1)
 7535 bruce                    4413               3 :             PG_RETURN_TEXT_P(inputstring);
 7188 bruce                    4414 ECB             :         else
 5493 tgl                      4415 GIC           3 :             PG_RETURN_TEXT_P(cstring_to_text(""));
                               4416                 :     }
  877 tgl                      4417 ECB             : 
                               4418                 :     /*
                               4419                 :      * take care of a negative field number (i.e. count from the right) by
                               4420                 :      * converting to a positive field number; we need total number of fields
                               4421                 :      */
  877 tgl                      4422 CBC          24 :     if (fldnum < 0)
                               4423                 :     {
                               4424                 :         /* we found a fldsep, so there are at least two fields */
                               4425              12 :         int         numfields = 2;
                               4426                 : 
  877 tgl                      4427 GIC          18 :         while (text_position_next(&state))
                               4428               6 :             numfields++;
                               4429                 : 
                               4430                 :         /* special case of last field does not require an extra pass */
                               4431              12 :         if (fldnum == -1)
                               4432                 :         {
                               4433               3 :             start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
  877 tgl                      4434 CBC           3 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
  877 tgl                      4435 GIC           3 :             text_position_cleanup(&state);
  877 tgl                      4436 CBC           3 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
                               4437                 :                                                       end_ptr - start_ptr));
  877 tgl                      4438 ECB             :         }
                               4439                 : 
                               4440                 :         /* else, convert fldnum to positive notation */
  877 tgl                      4441 GIC           9 :         fldnum += numfields + 1;
  877 tgl                      4442 ECB             : 
                               4443                 :         /* if nonexistent field, return empty string */
  877 tgl                      4444 GIC           9 :         if (fldnum <= 0)
  877 tgl                      4445 ECB             :         {
  877 tgl                      4446 GIC           3 :             text_position_cleanup(&state);
                               4447               3 :             PG_RETURN_TEXT_P(cstring_to_text(""));
  877 tgl                      4448 ECB             :         }
                               4449                 : 
                               4450                 :         /* reset to pointing at first match, but now with positive fldnum */
  877 tgl                      4451 CBC           6 :         text_position_reset(&state);
  877 tgl                      4452 GIC           6 :         found = text_position_next(&state);
  877 tgl                      4453 CBC           6 :         Assert(found);
  877 tgl                      4454 ECB             :     }
                               4455                 : 
                               4456                 :     /* identify bounds of first field */
  877 tgl                      4457 GIC          18 :     start_ptr = VARDATA_ANY(inputstring);
 1535 heikki.linnakangas       4458 CBC          18 :     end_ptr = text_position_get_match_ptr(&state);
                               4459                 : 
 1535 heikki.linnakangas       4460 GIC          33 :     while (found && --fldnum > 0)
                               4461                 :     {
                               4462                 :         /* identify bounds of next field */
                               4463              15 :         start_ptr = end_ptr + fldsep_len;
                               4464              15 :         found = text_position_next(&state);
                               4465              15 :         if (found)
                               4466               9 :             end_ptr = text_position_get_match_ptr(&state);
                               4467                 :     }
 6028 tgl                      4468 ECB             : 
 6028 tgl                      4469 GIC          18 :     text_position_cleanup(&state);
                               4470                 : 
                               4471              18 :     if (fldnum > 0)
                               4472                 :     {
 6028 tgl                      4473 ECB             :         /* N'th field separator not found */
                               4474                 :         /* if last field requested, return it, else empty string */
 6028 tgl                      4475 CBC           6 :         if (fldnum == 1)
 1535 heikki.linnakangas       4476 ECB             :         {
 1535 heikki.linnakangas       4477 GIC           3 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
 1535 heikki.linnakangas       4478 ECB             : 
 1535 heikki.linnakangas       4479 GIC           3 :             result_text = cstring_to_text_with_len(start_ptr,
                               4480                 :                                                    inputstring_len - last_len);
                               4481                 :         }
                               4482                 :         else
 5493 tgl                      4483 CBC           3 :             result_text = cstring_to_text("");
 7535 bruce                    4484 ECB             :     }
                               4485                 :     else
                               4486                 :     {
                               4487                 :         /* non-last field requested */
 1535 heikki.linnakangas       4488 GIC          12 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
                               4489                 :     }
                               4490                 : 
 6028 tgl                      4491              18 :     PG_RETURN_TEXT_P(result_text);
                               4492                 : }
 7535 bruce                    4493 ECB             : 
                               4494                 : /*
                               4495                 :  * Convenience function to return true when two text params are equal.
                               4496                 :  */
                               4497                 : static bool
 1479 peter                    4498 GIC         174 : text_isequal(text *txt1, text *txt2, Oid collid)
 4625 tgl                      4499 ECB             : {
 1479 peter                    4500 GIC         174 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
                               4501                 :                                                 collid,
                               4502                 :                                                 PointerGetDatum(txt1),
                               4503                 :                                                 PointerGetDatum(txt2)));
                               4504                 : }
                               4505                 : 
                               4506                 : /*
 7226 tgl                      4507 ECB             :  * text_to_array
                               4508                 :  * parse input string and return text array of elements,
                               4509                 :  * based on provided field separator
                               4510                 :  */
                               4511                 : Datum
 7226 tgl                      4512 CBC          73 : text_to_array(PG_FUNCTION_ARGS)
                               4513                 : {
                               4514                 :     SplitTextOutputData tstate;
                               4515                 : 
                               4516                 :     /* For array output, tstate should start as all zeroes */
  949 tgl                      4517 GIC          73 :     memset(&tstate, 0, sizeof(tstate));
                               4518                 : 
                               4519              73 :     if (!split_text(fcinfo, &tstate))
                               4520               3 :         PG_RETURN_NULL();
                               4521                 : 
                               4522              64 :     if (tstate.astate == NULL)
  949 tgl                      4523 CBC           3 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
                               4524                 : 
  224 peter                    4525 GNC          61 :     PG_RETURN_DATUM(makeArrayResult(tstate.astate,
                               4526                 :                                           CurrentMemoryContext));
                               4527                 : }
                               4528                 : 
                               4529                 : /*
 4625 tgl                      4530 ECB             :  * text_to_array_null
 4625 tgl                      4531 EUB             :  * parse input string and return text array of elements,
                               4532                 :  * based on provided field separator and null string
 4625 tgl                      4533 ECB             :  *
                               4534                 :  * This is a separate entry point only to prevent the regression tests from
                               4535                 :  * complaining about different argument sets for the same internal function.
                               4536                 :  */
                               4537                 : Datum
 4625 tgl                      4538 CBC          30 : text_to_array_null(PG_FUNCTION_ARGS)
                               4539                 : {
  949                          4540              30 :     return text_to_array(fcinfo);
                               4541                 : }
  949 tgl                      4542 ECB             : 
                               4543                 : /*
                               4544                 :  * text_to_table
                               4545                 :  * parse input string and return table of elements,
                               4546                 :  * based on provided field separator
                               4547                 :  */
                               4548                 : Datum
  949 tgl                      4549 CBC          42 : text_to_table(PG_FUNCTION_ARGS)
                               4550                 : {
  949 tgl                      4551 GIC          42 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
                               4552                 :     SplitTextOutputData tstate;
                               4553                 : 
                               4554              42 :     tstate.astate = NULL;
  173 michael                  4555              42 :     InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
  398                          4556              42 :     tstate.tupstore = rsi->setResult;
                               4557              42 :     tstate.tupdesc = rsi->setDesc;
                               4558                 : 
  949 tgl                      4559              42 :     (void) split_text(fcinfo, &tstate);
                               4560                 : 
  949 tgl                      4561 CBC          42 :     return (Datum) 0;
                               4562                 : }
                               4563                 : 
                               4564                 : /*
                               4565                 :  * text_to_table_null
                               4566                 :  * parse input string and return table of elements,
                               4567                 :  * based on provided field separator and null string
  949 tgl                      4568 ECB             :  *
                               4569                 :  * This is a separate entry point only to prevent the regression tests from
                               4570                 :  * complaining about different argument sets for the same internal function.
                               4571                 :  */
                               4572                 : Datum
  949 tgl                      4573 CBC          12 : text_to_table_null(PG_FUNCTION_ARGS)
  949 tgl                      4574 ECB             : {
  949 tgl                      4575 GIC          12 :     return text_to_table(fcinfo);
 4625 tgl                      4576 ECB             : }
                               4577                 : 
                               4578                 : /*
                               4579                 :  * Common code for text_to_array, text_to_array_null, text_to_table
                               4580                 :  * and text_to_table_null functions.
                               4581                 :  *
                               4582                 :  * These are not strict so we have to test for null inputs explicitly.
                               4583                 :  * Returns false if result is to be null, else returns true.
  949                          4584                 :  *
                               4585                 :  * Note that if the result is valid but empty (zero elements), we return
                               4586                 :  * without changing *tstate --- caller must handle that case, too.
 4625                          4587                 :  */
                               4588                 : static bool
  949 tgl                      4589 CBC         115 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
 4625 tgl                      4590 ECB             : {
                               4591                 :     text       *inputstring;
                               4592                 :     text       *fldsep;
                               4593                 :     text       *null_string;
  949 tgl                      4594 GIC         115 :     Oid         collation = PG_GET_COLLATION();
                               4595                 :     int         inputstring_len;
                               4596                 :     int         fldsep_len;
                               4597                 :     char       *start_ptr;
 7008 tgl                      4598 ECB             :     text       *result_text;
                               4599                 : 
                               4600                 :     /* when input string is NULL, then result is NULL too */
 4625 tgl                      4601 GIC         115 :     if (PG_ARGISNULL(0))
  949 tgl                      4602 CBC           6 :         return false;
 7226 tgl                      4603 ECB             : 
 4625 tgl                      4604 CBC         109 :     inputstring = PG_GETARG_TEXT_PP(0);
                               4605                 : 
 4625 tgl                      4606 ECB             :     /* fldsep can be NULL */
 4625 tgl                      4607 CBC         109 :     if (!PG_ARGISNULL(1))
                               4608              94 :         fldsep = PG_GETARG_TEXT_PP(1);
                               4609                 :     else
                               4610              15 :         fldsep = NULL;
 4625 tgl                      4611 ECB             : 
                               4612                 :     /* null_string can be NULL or omitted */
 4625 tgl                      4613 GIC         109 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
 4625 tgl                      4614 CBC          42 :         null_string = PG_GETARG_TEXT_PP(2);
                               4615                 :     else
 4625 tgl                      4616 GIC          67 :         null_string = NULL;
                               4617                 : 
                               4618             109 :     if (fldsep != NULL)
                               4619                 :     {
 4625 tgl                      4620 ECB             :         /*
                               4621                 :          * Normal case with non-null fldsep.  Use the text_position machinery
                               4622                 :          * to search for occurrences of fldsep.
                               4623                 :          */
                               4624                 :         TextPositionState state;
 4520 peter_e                  4625                 : 
 1535 heikki.linnakangas       4626 CBC          94 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
 1535 heikki.linnakangas       4627 GIC          94 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
 7226 tgl                      4628 EUB             : 
  949 tgl                      4629 ECB             :         /* return empty set for empty input string */
 4625 tgl                      4630 GIC          94 :         if (inputstring_len < 1)
  949                          4631              30 :             return true;
                               4632                 : 
                               4633                 :         /* empty field separator: return input string as a one-element set */
 4625 tgl                      4634 CBC          88 :         if (fldsep_len < 1)
                               4635                 :         {
  949                          4636              24 :             split_text_accum_result(tstate, inputstring,
                               4637                 :                                     null_string, collation);
                               4638              24 :             return true;
 7226 tgl                      4639 ECB             :         }
                               4640                 : 
  949 tgl                      4641 CBC          64 :         text_position_setup(inputstring, fldsep, collation, &state);
 1535 heikki.linnakangas       4642 ECB             : 
 4625 tgl                      4643 GIC          58 :         start_ptr = VARDATA_ANY(inputstring);
 4625 tgl                      4644 ECB             : 
 1535                          4645                 :         for (;;)
 4625 tgl                      4646 GIC         232 :         {
                               4647                 :             bool        found;
                               4648                 :             char       *end_ptr;
 1535 tgl                      4649 ECB             :             int         chunk_len;
                               4650                 : 
 1535 heikki.linnakangas       4651 CBC         290 :             CHECK_FOR_INTERRUPTS();
 5996 tgl                      4652 ECB             : 
 1535 heikki.linnakangas       4653 GIC         290 :             found = text_position_next(&state);
 1535 heikki.linnakangas       4654 GBC         290 :             if (!found)
 4625 tgl                      4655 EUB             :             {
                               4656                 :                 /* fetch last field */
 4625 tgl                      4657 GIC          58 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
 1535                          4658              58 :                 end_ptr = NULL; /* not used, but some compilers complain */
                               4659                 :             }
 4625 tgl                      4660 ECB             :             else
                               4661                 :             {
                               4662                 :                 /* fetch non-last field */
 1535 heikki.linnakangas       4663 CBC         232 :                 end_ptr = text_position_get_match_ptr(&state);
 1535 heikki.linnakangas       4664 GIC         232 :                 chunk_len = end_ptr - start_ptr;
                               4665                 :             }
                               4666                 : 
                               4667                 :             /* build a temp text datum to pass to split_text_accum_result */
 4625 tgl                      4668             290 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
                               4669                 : 
                               4670                 :             /* stash away this field */
  949                          4671             290 :             split_text_accum_result(tstate, result_text,
  949 tgl                      4672 ECB             :                                     null_string, collation);
                               4673                 : 
 4625 tgl                      4674 CBC         290 :             pfree(result_text);
                               4675                 : 
 1535 heikki.linnakangas       4676             290 :             if (!found)
 4625 tgl                      4677 GIC          58 :                 break;
                               4678                 : 
 1535 heikki.linnakangas       4679 CBC         232 :             start_ptr = end_ptr + fldsep_len;
 4625 tgl                      4680 ECB             :         }
                               4681                 : 
 4625 tgl                      4682 GIC          58 :         text_position_cleanup(&state);
                               4683                 :     }
 4625 tgl                      4684 ECB             :     else
                               4685                 :     {
 4520 peter_e                  4686                 :         /*
                               4687                 :          * When fldsep is NULL, each character in the input string becomes a
  949 tgl                      4688                 :          * separate element in the result set.  The separator is effectively
                               4689                 :          * the space between characters.
                               4690                 :          */
 4625 tgl                      4691 GIC          15 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
                               4692                 : 
                               4693              15 :         start_ptr = VARDATA_ANY(inputstring);
                               4694                 : 
                               4695             126 :         while (inputstring_len > 0)
 4625 tgl                      4696 ECB             :         {
 4382 bruce                    4697 GIC         111 :             int         chunk_len = pg_mblen(start_ptr);
 7226 tgl                      4698 ECB             : 
 4625 tgl                      4699 GIC         111 :             CHECK_FOR_INTERRUPTS();
 4625 tgl                      4700 ECB             : 
                               4701                 :             /* build a temp text datum to pass to split_text_accum_result */
 4625 tgl                      4702 GIC         111 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
 4520 peter_e                  4703 ECB             : 
 4625 tgl                      4704                 :             /* stash away this field */
  949 tgl                      4705 GIC         111 :             split_text_accum_result(tstate, result_text,
                               4706                 :                                     null_string, collation);
                               4707                 : 
 4625 tgl                      4708 CBC         111 :             pfree(result_text);
 4625 tgl                      4709 ECB             : 
 4625 tgl                      4710 CBC         111 :             start_ptr += chunk_len;
 4625 tgl                      4711 GIC         111 :             inputstring_len -= chunk_len;
 4625 tgl                      4712 ECB             :         }
                               4713                 :     }
                               4714                 : 
  949 tgl                      4715 GIC          73 :     return true;
                               4716                 : }
                               4717                 : 
                               4718                 : /*
                               4719                 :  * Add text item to result set (table or array).
                               4720                 :  *
  949 tgl                      4721 ECB             :  * This is also responsible for checking to see if the item matches
                               4722                 :  * the null_string, in which case we should emit NULL instead.
                               4723                 :  */
                               4724                 : static void
  949 tgl                      4725 GIC         425 : split_text_accum_result(SplitTextOutputData *tstate,
                               4726                 :                         text *field_value,
                               4727                 :                         text *null_string,
  949 tgl                      4728 ECB             :                         Oid collation)
                               4729                 : {
  949 tgl                      4730 GIC         425 :     bool        is_null = false;
  949 tgl                      4731 ECB             : 
  949 tgl                      4732 GIC         425 :     if (null_string && text_isequal(field_value, null_string, collation))
  949 tgl                      4733 CBC          30 :         is_null = true;
  949 tgl                      4734 ECB             : 
  949 tgl                      4735 GBC         425 :     if (tstate->tupstore)
                               4736                 :     {
  949 tgl                      4737 ECB             :         Datum       values[1];
                               4738                 :         bool        nulls[1];
                               4739                 : 
  949 tgl                      4740 GIC         114 :         values[0] = PointerGetDatum(field_value);
                               4741             114 :         nulls[0] = is_null;
  949 tgl                      4742 EUB             : 
  949 tgl                      4743 GIC         114 :         tuplestore_putvalues(tstate->tupstore,
  949 tgl                      4744 ECB             :                              tstate->tupdesc,
                               4745                 :                              values,
                               4746                 :                              nulls);
                               4747                 :     }
                               4748                 :     else
  949 tgl                      4749 EUB             :     {
  949 tgl                      4750 GIC         311 :         tstate->astate = accumArrayResult(tstate->astate,
                               4751                 :                                           PointerGetDatum(field_value),
  949 tgl                      4752 EUB             :                                           is_null,
                               4753                 :                                           TEXTOID,
                               4754                 :                                           CurrentMemoryContext);
                               4755                 :     }
 7226 tgl                      4756 GIC         425 : }
 7226 tgl                      4757 EUB             : 
                               4758                 : /*
                               4759                 :  * array_to_text
 7226 tgl                      4760 ECB             :  * concatenate Cstring representation of input array elements
                               4761                 :  * using provided field separator
                               4762                 :  */
                               4763                 : Datum
 7226 tgl                      4764 GIC       30154 : array_to_text(PG_FUNCTION_ARGS)
                               4765                 : {
                               4766           30154 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 5493                          4767           30154 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
 4625 tgl                      4768 ECB             : 
 4625 tgl                      4769 GIC       30154 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
                               4770                 : }
                               4771                 : 
                               4772                 : /*
                               4773                 :  * array_to_text_null
                               4774                 :  * concatenate Cstring representation of input array elements
 4625 tgl                      4775 ECB             :  * using provided field separator and null string
                               4776                 :  *
                               4777                 :  * This version is not strict so we have to test for null inputs explicitly.
                               4778                 :  */
                               4779                 : Datum
 4625 tgl                      4780 CBC           6 : array_to_text_null(PG_FUNCTION_ARGS)
 4625 tgl                      4781 ECB             : {
 4625 tgl                      4782 EUB             :     ArrayType  *v;
                               4783                 :     char       *fldsep;
 4625 tgl                      4784 ECB             :     char       *null_string;
                               4785                 : 
                               4786                 :     /* returns NULL when first or second parameter is NULL */
 4625 tgl                      4787 GIC           6 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
 4625 tgl                      4788 UIC           0 :         PG_RETURN_NULL();
 4520 peter_e                  4789 ECB             : 
 4625 tgl                      4790 GIC           6 :     v = PG_GETARG_ARRAYTYPE_P(0);
 4625 tgl                      4791 CBC           6 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
 4625 tgl                      4792 EUB             : 
                               4793                 :     /* NULL null string is passed through as a null pointer */
 4625 tgl                      4794 GIC           6 :     if (!PG_ARGISNULL(2))
 4625 tgl                      4795 CBC           3 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
 4625 tgl                      4796 ECB             :     else
 4625 tgl                      4797 CBC           3 :         null_string = NULL;
 4625 tgl                      4798 ECB             : 
 4625 tgl                      4799 GIC           6 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
                               4800                 : }
 4625 tgl                      4801 ECB             : 
                               4802                 : /*
                               4803                 :  * common code for array_to_text and array_to_text_null functions
                               4804                 :  */
                               4805                 : static text *
 4625 tgl                      4806 CBC       30169 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
 3726 tgl                      4807 ECB             :                        const char *fldsep, const char *null_string)
 4625                          4808                 : {
 4625 tgl                      4809 EUB             :     text       *result;
 7188 bruce                    4810                 :     int         nitems,
                               4811                 :                *dims,
                               4812                 :                 ndims;
 7226 tgl                      4813 ECB             :     Oid         element_type;
                               4814                 :     int         typlen;
                               4815                 :     bool        typbyval;
                               4816                 :     char        typalign;
                               4817                 :     StringInfoData buf;
 6351 tgl                      4818 GIC       30169 :     bool        printed = false;
                               4819                 :     char       *p;
                               4820                 :     bits8      *bitmap;
                               4821                 :     int         bitmask;
                               4822                 :     int         i;
                               4823                 :     ArrayMetaState *my_extra;
                               4824                 : 
 7226                          4825           30169 :     ndims = ARR_NDIM(v);
                               4826           30169 :     dims = ARR_DIMS(v);
                               4827           30169 :     nitems = ArrayGetNItems(ndims, dims);
 7226 tgl                      4828 ECB             : 
                               4829                 :     /* if there are no elements, return an empty string */
 7226 tgl                      4830 GIC       30169 :     if (nitems == 0)
 4625                          4831           18246 :         return cstring_to_text_with_len("", 0);
                               4832                 : 
 7226                          4833           11923 :     element_type = ARR_ELEMTYPE(v);
 6248 neilc                    4834 CBC       11923 :     initStringInfo(&buf);
                               4835                 : 
                               4836                 :     /*
 7226 tgl                      4837 EUB             :      * We arrange to look up info about element type, including its output
                               4838                 :      * conversion proc, only once per series of calls, assuming the element
                               4839                 :      * type doesn't change underneath us.
                               4840                 :      */
 7226 tgl                      4841 GIC       11923 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
                               4842           11923 :     if (my_extra == NULL)
                               4843                 :     {
 7226 tgl                      4844 CBC         695 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 6385 bruce                    4845 ECB             :                                                       sizeof(ArrayMetaState));
 7226 tgl                      4846 CBC         695 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
 6351 tgl                      4847 GIC         695 :         my_extra->element_type = ~element_type;
 7226 tgl                      4848 ECB             :     }
                               4849                 : 
 7226 tgl                      4850 GIC       11923 :     if (my_extra->element_type != element_type)
                               4851                 :     {
 7188 bruce                    4852 ECB             :         /*
                               4853                 :          * Get info about element type, including its output conversion proc
                               4854                 :          */
 7226 tgl                      4855 GIC         695 :         get_type_io_data(element_type, IOFunc_output,
 7226 tgl                      4856 ECB             :                          &my_extra->typlen, &my_extra->typbyval,
                               4857                 :                          &my_extra->typalign, &my_extra->typdelim,
                               4858                 :                          &my_extra->typioparam, &my_extra->typiofunc);
 7226 tgl                      4859 CBC         695 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
 7226 tgl                      4860 GIC         695 :                       fcinfo->flinfo->fn_mcxt);
 7226 tgl                      4861 CBC         695 :         my_extra->element_type = element_type;
 7226 tgl                      4862 ECB             :     }
 7226 tgl                      4863 GIC       11923 :     typlen = my_extra->typlen;
                               4864           11923 :     typbyval = my_extra->typbyval;
                               4865           11923 :     typalign = my_extra->typalign;
                               4866                 : 
 6351                          4867           11923 :     p = ARR_DATA_PTR(v);
                               4868           11923 :     bitmap = ARR_NULLBITMAP(v);
                               4869           11923 :     bitmask = 1;
                               4870                 : 
 7226                          4871           40628 :     for (i = 0; i < nitems; i++)
                               4872                 :     {
                               4873                 :         Datum       itemvalue;
                               4874                 :         char       *value;
 7226 tgl                      4875 ECB             : 
                               4876                 :         /* Get source element, checking for NULL */
 6351 tgl                      4877 CBC       28705 :         if (bitmap && (*bitmap & bitmask) == 0)
 6351 tgl                      4878 ECB             :         {
                               4879                 :             /* if null_string is NULL, we just ignore null elements */
 4625 tgl                      4880 GIC           9 :             if (null_string != NULL)
 4625 tgl                      4881 ECB             :             {
 4625 tgl                      4882 GIC           3 :                 if (printed)
 4625 tgl                      4883 CBC           3 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
                               4884                 :                 else
 4625 tgl                      4885 LBC           0 :                     appendStringInfoString(&buf, null_string);
 4625 tgl                      4886 CBC           3 :                 printed = true;
 4625 tgl                      4887 ECB             :             }
                               4888                 :         }
                               4889                 :         else
 6351                          4890                 :         {
 6351 tgl                      4891 GIC       28696 :             itemvalue = fetch_att(p, typbyval, typlen);
                               4892                 : 
 6214                          4893           28696 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
                               4894                 : 
 6351                          4895           28696 :             if (printed)
 6248 neilc                    4896           16773 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
 6351 tgl                      4897 ECB             :             else
 6248 neilc                    4898 CBC       11923 :                 appendStringInfoString(&buf, value);
 6351 tgl                      4899           28696 :             printed = true;
                               4900                 : 
 5847 tgl                      4901 GIC       28696 :             p = att_addlength_pointer(p, typlen, p);
                               4902           28696 :             p = (char *) att_align_nominal(p, typalign);
                               4903                 :         }
                               4904                 : 
                               4905                 :         /* advance bitmap pointer if any */
 6351                          4906           28705 :         if (bitmap)
 6351 tgl                      4907 ECB             :         {
 6351 tgl                      4908 GIC          54 :             bitmask <<= 1;
                               4909              54 :             if (bitmask == 0x100)
                               4910                 :             {
 6351 tgl                      4911 UIC           0 :                 bitmap++;
                               4912               0 :                 bitmask = 1;
 6351 tgl                      4913 ECB             :             }
 6351 tgl                      4914 EUB             :         }
                               4915                 :     }
 4520 peter_e                  4916 ECB             : 
 4625 tgl                      4917 CBC       11923 :     result = cstring_to_text_with_len(buf.data, buf.len);
 4625 tgl                      4918 GIC       11923 :     pfree(buf.data);
 7226 tgl                      4919 ECB             : 
 4625 tgl                      4920 GIC       11923 :     return result;
                               4921                 : }
                               4922                 : 
                               4923                 : #define HEXBASE 16
                               4924                 : /*
 2881 heikki.linnakangas       4925 EUB             :  * Convert an int32 to a string containing a base 16 (hex) representation of
 7535 bruce                    4926                 :  * the number.
                               4927                 :  */
                               4928                 : Datum
 7535 bruce                    4929 GIC       19342 : to_hex32(PG_FUNCTION_ARGS)
 7535 bruce                    4930 ECB             : {
 7051 tgl                      4931 GIC       19342 :     uint32      value = (uint32) PG_GETARG_INT32(0);
                               4932                 :     char       *ptr;
                               4933           19342 :     const char *digits = "0123456789abcdef";
                               4934                 :     char        buf[32];        /* bigger than needed, but reasonable */
 7535 bruce                    4935 ECB             : 
 7529 bruce                    4936 CBC       19342 :     ptr = buf + sizeof(buf) - 1;
 7535                          4937           19342 :     *ptr = '\0';
 7535 bruce                    4938 ECB             : 
                               4939                 :     do
                               4940                 :     {
 7535 bruce                    4941 CBC       37279 :         *--ptr = digits[value % HEXBASE];
 7535 bruce                    4942 GIC       37279 :         value /= HEXBASE;
                               4943           37279 :     } while (ptr > buf && value);
 7535 bruce                    4944 ECB             : 
 5493 tgl                      4945 GIC       19342 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
                               4946                 : }
 7535 bruce                    4947 ECB             : 
                               4948                 : /*
                               4949                 :  * Convert an int64 to a string containing a base 16 (hex) representation of
                               4950                 :  * the number.
                               4951                 :  */
                               4952                 : Datum
 7535 bruce                    4953 GIC           3 : to_hex64(PG_FUNCTION_ARGS)
                               4954                 : {
 7051 tgl                      4955               3 :     uint64      value = (uint64) PG_GETARG_INT64(0);
                               4956                 :     char       *ptr;
 7051 tgl                      4957 CBC           3 :     const char *digits = "0123456789abcdef";
                               4958                 :     char        buf[32];        /* bigger than needed, but reasonable */
                               4959                 : 
 7529 bruce                    4960 GIC           3 :     ptr = buf + sizeof(buf) - 1;
 7535                          4961               3 :     *ptr = '\0';
                               4962                 : 
                               4963                 :     do
 7535 bruce                    4964 ECB             :     {
 7535 bruce                    4965 GIC          24 :         *--ptr = digits[value % HEXBASE];
 7535 bruce                    4966 CBC          24 :         value /= HEXBASE;
 7535 bruce                    4967 GIC          24 :     } while (ptr > buf && value);
 7535 bruce                    4968 ECB             : 
 5493 tgl                      4969 GIC           3 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
                               4970                 : }
 7429 bruce                    4971 ECB             : 
                               4972                 : /*
                               4973                 :  * Return the size of a datum, possibly compressed
 6459 tgl                      4974                 :  *
                               4975                 :  * Works on any data type
 6486 bruce                    4976                 :  */
                               4977                 : Datum
 6486 bruce                    4978 CBC          61 : pg_column_size(PG_FUNCTION_ARGS)
                               4979                 : {
 6459 tgl                      4980 GIC          61 :     Datum       value = PG_GETARG_DATUM(0);
                               4981                 :     int32       result;
                               4982                 :     int         typlen;
                               4983                 : 
                               4984                 :     /* On first call, get the input type's typlen, and save at *fn_extra */
                               4985              61 :     if (fcinfo->flinfo->fn_extra == NULL)
                               4986                 :     {
                               4987                 :         /* Lookup the datatype of the supplied argument */
 6385 bruce                    4988 CBC          61 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
                               4989                 : 
 6459 tgl                      4990 GIC          61 :         typlen = get_typlen(argtypeid);
                               4991              61 :         if (typlen == 0)        /* should not happen */
 6485 bruce                    4992 UIC           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
                               4993                 : 
 6486 bruce                    4994 GIC          61 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               4995                 :                                                       sizeof(int));
 6459 tgl                      4996              61 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
 6486 bruce                    4997 ECB             :     }
                               4998                 :     else
 6459 tgl                      4999 LBC           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
                               5000                 : 
 6459 tgl                      5001 GIC          61 :     if (typlen == -1)
                               5002                 :     {
                               5003                 :         /* varlena type, possibly toasted */
                               5004              61 :         result = toast_datum_size(value);
 6459 tgl                      5005 ECB             :     }
 6459 tgl                      5006 LBC           0 :     else if (typlen == -2)
 6459 tgl                      5007 ECB             :     {
                               5008                 :         /* cstring */
 6459 tgl                      5009 LBC           0 :         result = strlen(DatumGetCString(value)) + 1;
                               5010                 :     }
                               5011                 :     else
 6486 bruce                    5012 ECB             :     {
                               5013                 :         /* ordinary fixed-width type */
 6459 tgl                      5014 UIC           0 :         result = typlen;
 6486 bruce                    5015 ECB             :     }
 6459 tgl                      5016                 : 
 6459 tgl                      5017 CBC          61 :     PG_RETURN_INT32(result);
                               5018                 : }
 4815 itagaki.takahiro         5019 ECB             : 
  751 rhaas                    5020                 : /*
                               5021                 :  * Return the compression method stored in the compressed attribute.  Return
                               5022                 :  * NULL for non varlena type or uncompressed data.
                               5023                 :  */
                               5024                 : Datum
  751 rhaas                    5025 GIC          81 : pg_column_compression(PG_FUNCTION_ARGS)
  751 rhaas                    5026 ECB             : {
                               5027                 :     int         typlen;
                               5028                 :     char       *result;
                               5029                 :     ToastCompressionId cmid;
                               5030                 : 
                               5031                 :     /* On first call, get the input type's typlen, and save at *fn_extra */
  751 rhaas                    5032 GIC          81 :     if (fcinfo->flinfo->fn_extra == NULL)
  751 rhaas                    5033 ECB             :     {
                               5034                 :         /* Lookup the datatype of the supplied argument */
  751 rhaas                    5035 CBC          54 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
                               5036                 : 
  751 rhaas                    5037 GIC          54 :         typlen = get_typlen(argtypeid);
  751 rhaas                    5038 CBC          54 :         if (typlen == 0)        /* should not happen */
  751 rhaas                    5039 UIC           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
                               5040                 : 
  751 rhaas                    5041 GIC          54 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
  751 rhaas                    5042 ECB             :                                                       sizeof(int));
  751 rhaas                    5043 GIC          54 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
                               5044                 :     }
                               5045                 :     else
                               5046              27 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
                               5047                 : 
                               5048              81 :     if (typlen != -1)
  751 rhaas                    5049 UIC           0 :         PG_RETURN_NULL();
                               5050                 : 
                               5051                 :     /* get the compression method id stored in the compressed varlena */
  751 rhaas                    5052 CBC          81 :     cmid = toast_get_compression_id((struct varlena *)
  751 rhaas                    5053 GIC          81 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
                               5054              81 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
                               5055               3 :         PG_RETURN_NULL();
                               5056                 : 
                               5057                 :     /* convert compression method id to compression method name */
  751 rhaas                    5058 CBC          78 :     switch (cmid)
  751 rhaas                    5059 ECB             :     {
  751 rhaas                    5060 GIC          33 :         case TOAST_PGLZ_COMPRESSION_ID:
  751 rhaas                    5061 CBC          33 :             result = "pglz";
  751 rhaas                    5062 GIC          33 :             break;
                               5063              45 :         case TOAST_LZ4_COMPRESSION_ID:
                               5064              45 :             result = "lz4";
                               5065              45 :             break;
  751 rhaas                    5066 UIC           0 :         default:
  751 rhaas                    5067 LBC           0 :             elog(ERROR, "invalid compression method id %d", cmid);
  751 rhaas                    5068 ECB             :     }
  751 rhaas                    5069 EUB             : 
  751 rhaas                    5070 GIC          78 :     PG_RETURN_TEXT_P(cstring_to_text(result));
  751 rhaas                    5071 ECB             : }
                               5072                 : 
                               5073                 : /*
                               5074                 :  * string_agg - Concatenates values and returns string.
 4815 itagaki.takahiro         5075                 :  *
                               5076                 :  * Syntax: string_agg(value text, delimiter text) RETURNS text
                               5077                 :  *
                               5078                 :  * Note: Any NULL values are ignored. The first-call delimiter isn't
                               5079                 :  * actually used at all, and on subsequent calls the delimiter precedes
                               5080                 :  * the associated value.
                               5081                 :  */
                               5082                 : 
                               5083                 : /* subroutine to initialize state */
                               5084                 : static StringInfo
 4808 tgl                      5085 GIC        1026 : makeStringAggState(FunctionCallInfo fcinfo)
                               5086                 : {
                               5087                 :     StringInfo  state;
                               5088                 :     MemoryContext aggcontext;
                               5089                 :     MemoryContext oldcontext;
 4815 itagaki.takahiro         5090 ECB             : 
 4808 tgl                      5091 GIC        1026 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
                               5092                 :     {
                               5093                 :         /* cannot be called directly because of internal-type argument */
 4815 itagaki.takahiro         5094 UIC           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
                               5095                 :     }
 4815 itagaki.takahiro         5096 ECB             : 
                               5097                 :     /*
                               5098                 :      * Create state in aggregate context.  It'll stay there across subsequent
                               5099                 :      * calls.
                               5100                 :      */
 4815 itagaki.takahiro         5101 GIC        1026 :     oldcontext = MemoryContextSwitchTo(aggcontext);
                               5102            1026 :     state = makeStringInfo();
                               5103            1026 :     MemoryContextSwitchTo(oldcontext);
 4815 itagaki.takahiro         5104 ECB             : 
 4815 itagaki.takahiro         5105 GIC        1026 :     return state;
                               5106                 : }
                               5107                 : 
                               5108                 : Datum
 4815 itagaki.takahiro         5109 CBC      420976 : string_agg_transfn(PG_FUNCTION_ARGS)
                               5110                 : {
                               5111                 :     StringInfo  state;
 4815 itagaki.takahiro         5112 ECB             : 
 4815 itagaki.takahiro         5113 CBC      420976 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               5114                 : 
                               5115                 :     /* Append the value unless null, preceding it with the delimiter. */
 4815 itagaki.takahiro         5116 GIC      420976 :     if (!PG_ARGISNULL(1))
                               5117                 :     {
   76 drowley                  5118 GNC      413452 :         text       *value = PG_GETARG_TEXT_PP(1);
                               5119          413452 :         bool        isfirst = false;
                               5120                 : 
                               5121                 :         /*
                               5122                 :          * You might think we can just throw away the first delimiter, however
                               5123                 :          * we must keep it as we may be a parallel worker doing partial
                               5124                 :          * aggregation building a state to send to the main process.  We need
                               5125                 :          * to keep the delimiter of every aggregation so that the combine
                               5126                 :          * function can properly join up the strings of two separately
                               5127                 :          * partially aggregated results.  The first delimiter is only stripped
                               5128                 :          * off in the final function.  To know how much to strip off the front
                               5129                 :          * of the string, we store the length of the first delimiter in the
                               5130                 :          * StringInfo's cursor field, which we don't otherwise need here.
                               5131                 :          */
 4815 itagaki.takahiro         5132          413452 :         if (state == NULL)
                               5133                 :         {
 4808 tgl                      5134             773 :             state = makeStringAggState(fcinfo);
   76 drowley                  5135             773 :             isfirst = true;
                               5136                 :         }
                               5137                 : 
                               5138          413452 :         if (!PG_ARGISNULL(2))
                               5139                 :         {
                               5140          413452 :             text       *delim = PG_GETARG_TEXT_PP(2);
                               5141                 : 
                               5142          413452 :             appendStringInfoText(state, delim);
                               5143          413452 :             if (isfirst)
                               5144             773 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
                               5145                 :         }
                               5146                 : 
                               5147          413452 :         appendStringInfoText(state, value);
 4815 itagaki.takahiro         5148 ECB             :     }
                               5149                 : 
                               5150                 :     /*
                               5151                 :      * The transition type for string_agg() is declared to be "internal",
                               5152                 :      * which is a pass-by-value type the same size as a pointer.
                               5153                 :      */
   76 drowley                  5154 GNC      420976 :     if (state)
                               5155          420931 :         PG_RETURN_POINTER(state);
                               5156              45 :     PG_RETURN_NULL();
                               5157                 : }
                               5158                 : 
                               5159                 : /*
                               5160                 :  * string_agg_combine
                               5161                 :  *      Aggregate combine function for string_agg(text) and string_agg(bytea)
                               5162                 :  */
                               5163                 : Datum
                               5164             100 : string_agg_combine(PG_FUNCTION_ARGS)
                               5165                 : {
                               5166                 :     StringInfo  state1;
                               5167                 :     StringInfo  state2;
                               5168                 :     MemoryContext agg_context;
                               5169                 : 
                               5170             100 :     if (!AggCheckCallContext(fcinfo, &agg_context))
   76 drowley                  5171 UNC           0 :         elog(ERROR, "aggregate function called in non-aggregate context");
                               5172                 : 
   76 drowley                  5173 GNC         100 :     state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               5174             100 :     state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
                               5175                 : 
                               5176             100 :     if (state2 == NULL)
                               5177                 :     {
                               5178                 :         /*
                               5179                 :          * NULL state2 is easy, just return state1, which we know is already
                               5180                 :          * in the agg_context
                               5181                 :          */
   76 drowley                  5182 UNC           0 :         if (state1 == NULL)
                               5183               0 :             PG_RETURN_NULL();
                               5184               0 :         PG_RETURN_POINTER(state1);
                               5185                 :     }
                               5186                 : 
   76 drowley                  5187 GNC         100 :     if (state1 == NULL)
                               5188                 :     {
                               5189                 :         /* We must copy state2's data into the agg_context */
                               5190                 :         MemoryContext old_context;
                               5191                 : 
                               5192              60 :         old_context = MemoryContextSwitchTo(agg_context);
                               5193              60 :         state1 = makeStringAggState(fcinfo);
                               5194              60 :         appendBinaryStringInfo(state1, state2->data, state2->len);
                               5195              60 :         state1->cursor = state2->cursor;
                               5196              60 :         MemoryContextSwitchTo(old_context);
                               5197                 :     }
                               5198              40 :     else if (state2->len > 0)
                               5199                 :     {
                               5200                 :         /* Combine ... state1->cursor does not change in this case */
                               5201              40 :         appendBinaryStringInfo(state1, state2->data, state2->len);
                               5202                 :     }
                               5203                 : 
                               5204             100 :     PG_RETURN_POINTER(state1);
                               5205                 : }
                               5206                 : 
                               5207                 : /*
                               5208                 :  * string_agg_serialize
                               5209                 :  *      Aggregate serialize function for string_agg(text) and string_agg(bytea)
                               5210                 :  *
                               5211                 :  * This is strict, so we need not handle NULL input
                               5212                 :  */
                               5213                 : Datum
                               5214             100 : string_agg_serialize(PG_FUNCTION_ARGS)
                               5215                 : {
                               5216                 :     StringInfo  state;
                               5217                 :     StringInfoData buf;
                               5218                 :     bytea      *result;
                               5219                 : 
                               5220                 :     /* cannot be called directly because of internal-type argument */
                               5221             100 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               5222                 : 
                               5223             100 :     state = (StringInfo) PG_GETARG_POINTER(0);
                               5224                 : 
                               5225             100 :     pq_begintypsend(&buf);
                               5226                 : 
                               5227                 :     /* cursor */
                               5228             100 :     pq_sendint(&buf, state->cursor, 4);
                               5229                 : 
                               5230                 :     /* data */
                               5231             100 :     pq_sendbytes(&buf, state->data, state->len);
                               5232                 : 
                               5233             100 :     result = pq_endtypsend(&buf);
                               5234                 : 
                               5235             100 :     PG_RETURN_BYTEA_P(result);
                               5236                 : }
                               5237                 : 
                               5238                 : /*
                               5239                 :  * string_agg_deserialize
                               5240                 :  *      Aggregate deserial function for string_agg(text) and string_agg(bytea)
                               5241                 :  *
                               5242                 :  * This is strict, so we need not handle NULL input
                               5243                 :  */
                               5244                 : Datum
                               5245             100 : string_agg_deserialize(PG_FUNCTION_ARGS)
                               5246                 : {
                               5247                 :     bytea      *sstate;
                               5248                 :     StringInfo  result;
                               5249                 :     StringInfoData buf;
                               5250                 :     char       *data;
                               5251                 :     int         datalen;
                               5252                 : 
                               5253                 :     /* cannot be called directly because of internal-type argument */
                               5254             100 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               5255                 : 
                               5256             100 :     sstate = PG_GETARG_BYTEA_PP(0);
                               5257                 : 
                               5258                 :     /*
                               5259                 :      * Copy the bytea into a StringInfo so that we can "receive" it using the
                               5260                 :      * standard recv-function infrastructure.
                               5261                 :      */
                               5262             100 :     initStringInfo(&buf);
                               5263             200 :     appendBinaryStringInfo(&buf,
                               5264             200 :                            VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
                               5265                 : 
                               5266             100 :     result = makeStringAggState(fcinfo);
                               5267                 : 
                               5268                 :     /* cursor */
                               5269             100 :     result->cursor = pq_getmsgint(&buf, 4);
                               5270                 : 
                               5271                 :     /* data */
                               5272             100 :     datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
                               5273             100 :     data = (char *) pq_getmsgbytes(&buf, datalen);
                               5274             100 :     appendBinaryStringInfo(result, data, datalen);
                               5275                 : 
                               5276             100 :     pq_getmsgend(&buf);
                               5277             100 :     pfree(buf.data);
                               5278                 : 
                               5279             100 :     PG_RETURN_POINTER(result);
                               5280                 : }
                               5281                 : 
                               5282                 : Datum
 4815 itagaki.takahiro         5283 CBC         789 : string_agg_finalfn(PG_FUNCTION_ARGS)
                               5284                 : {
                               5285                 :     StringInfo  state;
 4815 itagaki.takahiro         5286 ECB             : 
                               5287                 :     /* cannot be called directly because of internal-type argument */
 4808 tgl                      5288 CBC         789 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               5289                 : 
                               5290             789 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               5291                 : 
 4815 itagaki.takahiro         5292             789 :     if (state != NULL)
                               5293                 :     {
                               5294                 :         /* As per comment in transfn, strip data before the cursor position */
   76 drowley                  5295 GNC         753 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor],
                               5296                 :                                                   state->len - state->cursor));
                               5297                 :     }
 4815 itagaki.takahiro         5298 ECB             :     else
 4815 itagaki.takahiro         5299 GIC          36 :         PG_RETURN_NULL();
                               5300                 : }
 4611 itagaki.takahiro         5301 ECB             : 
 2028 tgl                      5302                 : /*
                               5303                 :  * Prepare cache with fmgr info for the output functions of the datatypes of
                               5304                 :  * the arguments of a concat-like function, beginning with argument "argidx".
                               5305                 :  * (Arguments before that will have corresponding slots in the resulting
                               5306                 :  * FmgrInfo array, but we don't fill those slots.)
                               5307                 :  */
                               5308                 : static FmgrInfo *
 2028 tgl                      5309 GIC          20 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
                               5310                 : {
                               5311                 :     FmgrInfo   *foutcache;
 2028 tgl                      5312 ECB             :     int         i;
                               5313                 : 
                               5314                 :     /* We keep the info in fn_mcxt so it survives across calls */
 2028 tgl                      5315 CBC          20 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 2028 tgl                      5316 GIC          20 :                                                 PG_NARGS() * sizeof(FmgrInfo));
                               5317                 : 
                               5318              98 :     for (i = argidx; i < PG_NARGS(); i++)
                               5319                 :     {
                               5320                 :         Oid         valtype;
                               5321                 :         Oid         typOutput;
 2028 tgl                      5322 ECB             :         bool        typIsVarlena;
                               5323                 : 
 2028 tgl                      5324 GIC          78 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
                               5325              78 :         if (!OidIsValid(valtype))
 2028 tgl                      5326 LBC           0 :             elog(ERROR, "could not determine data type of concat() input");
 2028 tgl                      5327 ECB             : 
 2028 tgl                      5328 CBC          78 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
                               5329              78 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
                               5330                 :     }
                               5331                 : 
 2028 tgl                      5332 GIC          20 :     fcinfo->flinfo->fn_extra = foutcache;
                               5333                 : 
                               5334              20 :     return foutcache;
                               5335                 : }
                               5336                 : 
 4241 tgl                      5337 ECB             : /*
                               5338                 :  * Implementation of both concat() and concat_ws().
                               5339                 :  *
                               5340                 :  * sepstr is the separator string to place between values.
                               5341                 :  * argidx identifies the first argument to concatenate (counting from zero);
                               5342                 :  * note that this must be constant across any one series of calls.
 2028                          5343                 :  *
 3726                          5344                 :  * Returns NULL if result should be NULL, else text value.
 4241                          5345                 :  */
                               5346                 : static text *
 3726 tgl                      5347 CBC          36 : concat_internal(const char *sepstr, int argidx,
 4241 tgl                      5348 ECB             :                 FunctionCallInfo fcinfo)
 4611 itagaki.takahiro         5349                 : {
 4382 bruce                    5350                 :     text       *result;
                               5351                 :     StringInfoData str;
                               5352                 :     FmgrInfo   *foutcache;
 4241 tgl                      5353 GIC          36 :     bool        first_arg = true;
                               5354                 :     int         i;
                               5355                 : 
                               5356                 :     /*
                               5357                 :      * concat(VARIADIC some-array) is essentially equivalent to
 3726 tgl                      5358 ECB             :      * array_to_text(), ie concat the array elements with the given separator.
                               5359                 :      * So we just pass the case off to that code.
                               5360                 :      */
 3726 tgl                      5361 GIC          36 :     if (get_fn_expr_variadic(fcinfo->flinfo))
 3726 tgl                      5362 ECB             :     {
                               5363                 :         ArrayType  *arr;
                               5364                 : 
                               5365                 :         /* Should have just the one argument */
 3726 tgl                      5366 CBC          15 :         Assert(argidx == PG_NARGS() - 1);
                               5367                 : 
                               5368                 :         /* concat(VARIADIC NULL) is defined as NULL */
                               5369              15 :         if (PG_ARGISNULL(argidx))
                               5370               6 :             return NULL;
 3726 tgl                      5371 ECB             : 
                               5372                 :         /*
                               5373                 :          * Non-null argument had better be an array.  We assume that any call
 3293                          5374                 :          * context that could let get_fn_expr_variadic return true will have
                               5375                 :          * checked that a VARIADIC-labeled parameter actually is an array.  So
                               5376                 :          * it should be okay to just Assert that it's an array rather than
                               5377                 :          * doing a full-fledged error check.
                               5378                 :          */
 3293 tgl                      5379 GIC           9 :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
                               5380                 : 
                               5381                 :         /* OK, safe to fetch the array value */
 3726 tgl                      5382 CBC           9 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
                               5383                 : 
 3726 tgl                      5384 ECB             :         /*
                               5385                 :          * And serialize the array.  We tell array_to_text to ignore null
                               5386                 :          * elements, which matches the behavior of the loop below.
                               5387                 :          */
 3726 tgl                      5388 GIC           9 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
                               5389                 :     }
 3726 tgl                      5390 ECB             : 
                               5391                 :     /* Normal case without explicit VARIADIC marker */
 4611 itagaki.takahiro         5392 GIC          21 :     initStringInfo(&str);
 4611 itagaki.takahiro         5393 ECB             : 
 2028 tgl                      5394                 :     /* Get output function info, building it if first time through */
 2028 tgl                      5395 GIC          21 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
 2028 tgl                      5396 CBC          21 :     if (foutcache == NULL)
 2028 tgl                      5397 GIC          20 :         foutcache = build_concat_foutcache(fcinfo, argidx);
                               5398                 : 
 4611 itagaki.takahiro         5399             102 :     for (i = argidx; i < PG_NARGS(); i++)
                               5400                 :     {
                               5401              81 :         if (!PG_ARGISNULL(i))
                               5402                 :         {
 4241 tgl                      5403 CBC          75 :             Datum       value = PG_GETARG_DATUM(i);
                               5404                 : 
 4241 tgl                      5405 ECB             :             /* add separator if appropriate */
 4241 tgl                      5406 CBC          75 :             if (first_arg)
                               5407              21 :                 first_arg = false;
 4241 tgl                      5408 ECB             :             else
 3726 tgl                      5409 GIC          54 :                 appendStringInfoString(&str, sepstr);
                               5410                 : 
                               5411                 :             /* call the appropriate type output function, append the result */
 4611 itagaki.takahiro         5412 CBC          75 :             appendStringInfoString(&str,
 2028 tgl                      5413              75 :                                    OutputFunctionCall(&foutcache[i], value));
 4611 itagaki.takahiro         5414 ECB             :         }
                               5415                 :     }
                               5416                 : 
 4611 itagaki.takahiro         5417 GIC          21 :     result = cstring_to_text_with_len(str.data, str.len);
                               5418              21 :     pfree(str.data);
 4611 itagaki.takahiro         5419 ECB             : 
 4611 itagaki.takahiro         5420 GIC          21 :     return result;
                               5421                 : }
                               5422                 : 
 4611 itagaki.takahiro         5423 ECB             : /*
                               5424                 :  * Concatenate all arguments. NULL arguments are ignored.
                               5425                 :  */
                               5426                 : Datum
 4611 itagaki.takahiro         5427 GIC          18 : text_concat(PG_FUNCTION_ARGS)
                               5428                 : {
                               5429                 :     text       *result;
                               5430                 : 
 3726 tgl                      5431              18 :     result = concat_internal("", 0, fcinfo);
 3726 tgl                      5432 GBC          18 :     if (result == NULL)
                               5433               3 :         PG_RETURN_NULL();
 3726 tgl                      5434 GIC          15 :     PG_RETURN_TEXT_P(result);
                               5435                 : }
 4611 itagaki.takahiro         5436 ECB             : 
                               5437                 : /*
                               5438                 :  * Concatenate all but first argument value with separators. The first
                               5439                 :  * parameter is used as the separator. NULL arguments are ignored.
                               5440                 :  */
                               5441                 : Datum
 4611 itagaki.takahiro         5442 GIC          21 : text_concat_ws(PG_FUNCTION_ARGS)
                               5443                 : {
                               5444                 :     char       *sep;
                               5445                 :     text       *result;
                               5446                 : 
                               5447                 :     /* return NULL when separator is NULL */
                               5448              21 :     if (PG_ARGISNULL(0))
                               5449               3 :         PG_RETURN_NULL();
 3726 tgl                      5450              18 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
                               5451                 : 
                               5452              18 :     result = concat_internal(sep, 1, fcinfo);
                               5453              18 :     if (result == NULL)
                               5454               3 :         PG_RETURN_NULL();
                               5455              15 :     PG_RETURN_TEXT_P(result);
                               5456                 : }
                               5457                 : 
 4611 itagaki.takahiro         5458 ECB             : /*
                               5459                 :  * Return first n characters in the string. When n is negative,
                               5460                 :  * return all but last |n| characters.
                               5461                 :  */
                               5462                 : Datum
 4611 itagaki.takahiro         5463 GIC         942 : text_left(PG_FUNCTION_ARGS)
                               5464                 : {
 1418 tgl                      5465             942 :     int         n = PG_GETARG_INT32(1);
                               5466                 : 
 4611 itagaki.takahiro         5467             942 :     if (n < 0)
                               5468                 :     {
 1468 sfrost                   5469 CBC          15 :         text       *str = PG_GETARG_TEXT_PP(0);
                               5470              15 :         const char *p = VARDATA_ANY(str);
                               5471              15 :         int         len = VARSIZE_ANY_EXHDR(str);
 1468 sfrost                   5472 ECB             :         int         rlen;
 4611 itagaki.takahiro         5473                 : 
 1468 sfrost                   5474 GIC          15 :         n = pg_mbstrlen_with_len(p, len) + n;
                               5475              15 :         rlen = pg_mbcharcliplen(p, len, n);
                               5476              15 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
                               5477                 :     }
 1468 sfrost                   5478 ECB             :     else
 1468 sfrost                   5479 CBC         927 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
                               5480                 : }
                               5481                 : 
 4611 itagaki.takahiro         5482 ECB             : /*
                               5483                 :  * Return last n characters in the string. When n is negative,
                               5484                 :  * return all but first |n| characters.
                               5485                 :  */
                               5486                 : Datum
 4611 itagaki.takahiro         5487 GIC          33 : text_right(PG_FUNCTION_ARGS)
                               5488                 : {
                               5489              33 :     text       *str = PG_GETARG_TEXT_PP(0);
                               5490              33 :     const char *p = VARDATA_ANY(str);
 4611 itagaki.takahiro         5491 CBC          33 :     int         len = VARSIZE_ANY_EXHDR(str);
 4611 itagaki.takahiro         5492 GIC          33 :     int         n = PG_GETARG_INT32(1);
                               5493                 :     int         off;
 4611 itagaki.takahiro         5494 ECB             : 
 4611 itagaki.takahiro         5495 CBC          33 :     if (n < 0)
 4611 itagaki.takahiro         5496 GIC          15 :         n = -n;
                               5497                 :     else
                               5498              18 :         n = pg_mbstrlen_with_len(p, len) - n;
                               5499              33 :     off = pg_mbcharcliplen(p, len, n);
                               5500                 : 
                               5501              33 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
                               5502                 : }
                               5503                 : 
                               5504                 : /*
 4611 itagaki.takahiro         5505 ECB             :  * Return reversed string
                               5506                 :  */
                               5507                 : Datum
 4611 itagaki.takahiro         5508 CBC           3 : text_reverse(PG_FUNCTION_ARGS)
                               5509                 : {
 4382 bruce                    5510 GIC           3 :     text       *str = PG_GETARG_TEXT_PP(0);
 4382 bruce                    5511 CBC           3 :     const char *p = VARDATA_ANY(str);
                               5512               3 :     int         len = VARSIZE_ANY_EXHDR(str);
 4382 bruce                    5513 GIC           3 :     const char *endp = p + len;
                               5514                 :     text       *result;
                               5515                 :     char       *dst;
 4611 itagaki.takahiro         5516 ECB             : 
 4611 itagaki.takahiro         5517 GIC           3 :     result = palloc(len + VARHDRSZ);
 4382 bruce                    5518               3 :     dst = (char *) VARDATA(result) + len;
 4611 itagaki.takahiro         5519               3 :     SET_VARSIZE(result, len + VARHDRSZ);
 4611 itagaki.takahiro         5520 ECB             : 
 4611 itagaki.takahiro         5521 CBC           3 :     if (pg_database_encoding_max_length() > 1)
                               5522                 :     {
                               5523                 :         /* multibyte version */
 4611 itagaki.takahiro         5524 GIC          18 :         while (p < endp)
                               5525                 :         {
 4382 bruce                    5526 ECB             :             int         sz;
 4611 itagaki.takahiro         5527                 : 
 4611 itagaki.takahiro         5528 GIC          15 :             sz = pg_mblen(p);
                               5529              15 :             dst -= sz;
                               5530              15 :             memcpy(dst, p, sz);
 4611 itagaki.takahiro         5531 CBC          15 :             p += sz;
 4611 itagaki.takahiro         5532 ECB             :         }
                               5533                 :     }
                               5534                 :     else
                               5535                 :     {
                               5536                 :         /* single byte version */
 4611 itagaki.takahiro         5537 UIC           0 :         while (p < endp)
 4611 itagaki.takahiro         5538 LBC           0 :             *(--dst) = *p++;
                               5539                 :     }
                               5540                 : 
 4611 itagaki.takahiro         5541 GIC           3 :     PG_RETURN_TEXT_P(result);
                               5542                 : }
                               5543                 : 
                               5544                 : 
                               5545                 : /*
                               5546                 :  * Support macros for text_format()
                               5547                 :  */
                               5548                 : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
                               5549                 : 
                               5550                 : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
                               5551                 :     do { \
 3678 tgl                      5552 ECB             :         if (++(ptr) >= (end_ptr)) \
                               5553                 :             ereport(ERROR, \
                               5554                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
 2614 teodor                   5555                 :                      errmsg("unterminated format() type specifier"), \
                               5556                 :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
                               5557                 :     } while (0)
 3678 tgl                      5558                 : 
                               5559                 : /*
                               5560                 :  * Returns a formatted string
 4523 rhaas                    5561                 :  */
                               5562                 : Datum
 4523 rhaas                    5563 CBC       12534 : text_format(PG_FUNCTION_ARGS)
 4523 rhaas                    5564 ECB             : {
                               5565                 :     text       *fmt;
                               5566                 :     StringInfoData str;
                               5567                 :     const char *cp;
                               5568                 :     const char *start_ptr;
                               5569                 :     const char *end_ptr;
                               5570                 :     text       *result;
                               5571                 :     int         arg;
                               5572                 :     bool        funcvariadic;
                               5573                 :     int         nargs;
 3726 tgl                      5574 GIC       12534 :     Datum      *elements = NULL;
                               5575           12534 :     bool       *nulls = NULL;
                               5576           12534 :     Oid         element_type = InvalidOid;
                               5577           12534 :     Oid         prev_type = InvalidOid;
 3678                          5578           12534 :     Oid         prev_width_type = InvalidOid;
                               5579                 :     FmgrInfo    typoutputfinfo;
 3678 tgl                      5580 ECB             :     FmgrInfo    typoutputinfo_width;
 4523 rhaas                    5581                 : 
                               5582                 :     /* When format string is null, immediately return null */
 4523 rhaas                    5583 GIC       12534 :     if (PG_ARGISNULL(0))
                               5584               3 :         PG_RETURN_NULL();
                               5585                 : 
                               5586                 :     /* If argument is marked VARIADIC, expand array into elements */
 3726 tgl                      5587           12531 :     if (get_fn_expr_variadic(fcinfo->flinfo))
 3726 tgl                      5588 ECB             :     {
                               5589                 :         ArrayType  *arr;
                               5590                 :         int16       elmlen;
                               5591                 :         bool        elmbyval;
                               5592                 :         char        elmalign;
                               5593                 :         int         nitems;
 3726 tgl                      5594 EUB             : 
                               5595                 :         /* Should have just the one argument */
 3726 tgl                      5596 GIC          24 :         Assert(PG_NARGS() == 2);
                               5597                 : 
                               5598                 :         /* If argument is NULL, we treat it as zero-length array */
 3726 tgl                      5599 CBC          24 :         if (PG_ARGISNULL(1))
 3726 tgl                      5600 GIC           3 :             nitems = 0;
 3726 tgl                      5601 ECB             :         else
                               5602                 :         {
                               5603                 :             /*
                               5604                 :              * Non-null argument had better be an array.  We assume that any
                               5605                 :              * call context that could let get_fn_expr_variadic return true
                               5606                 :              * will have checked that a VARIADIC-labeled parameter actually is
 3293 tgl                      5607 EUB             :              * an array.  So it should be okay to just Assert that it's an
                               5608                 :              * array rather than doing a full-fledged error check.
 3726                          5609                 :              */
 3293 tgl                      5610 GIC          21 :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
 3726 tgl                      5611 ECB             : 
 3293 tgl                      5612 EUB             :             /* OK, safe to fetch the array value */
 3726 tgl                      5613 GIC          21 :             arr = PG_GETARG_ARRAYTYPE_P(1);
 3726 tgl                      5614 ECB             : 
                               5615                 :             /* Get info about array element type */
 3726 tgl                      5616 GIC          21 :             element_type = ARR_ELEMTYPE(arr);
 3726 tgl                      5617 CBC          21 :             get_typlenbyvalalign(element_type,
 3726 tgl                      5618 ECB             :                                  &elmlen, &elmbyval, &elmalign);
                               5619                 : 
                               5620                 :             /* Extract all array elements */
 3726 tgl                      5621 GBC          21 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
 3726 tgl                      5622 EUB             :                               &elements, &nulls, &nitems);
                               5623                 :         }
                               5624                 : 
 3726 tgl                      5625 GIC          24 :         nargs = nitems + 1;
                               5626              24 :         funcvariadic = true;
                               5627                 :     }
 3726 tgl                      5628 EUB             :     else
                               5629                 :     {
                               5630                 :         /* Non-variadic case, we'll process the arguments individually */
 3726 tgl                      5631 GIC       12507 :         nargs = PG_NARGS();
                               5632           12507 :         funcvariadic = false;
 3726 tgl                      5633 EUB             :     }
                               5634                 : 
 4523 rhaas                    5635                 :     /* Setup for main loop. */
 4523 rhaas                    5636 GIC       12531 :     fmt = PG_GETARG_TEXT_PP(0);
                               5637           12531 :     start_ptr = VARDATA_ANY(fmt);
 4523 rhaas                    5638 GBC       12531 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
 4523 rhaas                    5639 GIC       12531 :     initStringInfo(&str);
 3678 tgl                      5640           12531 :     arg = 1;                    /* next argument position to print */
 4523 rhaas                    5641 EUB             : 
                               5642                 :     /* Scan format string, looking for conversion specifiers. */
 4523 rhaas                    5643 GBC      363849 :     for (cp = start_ptr; cp < end_ptr; cp++)
                               5644                 :     {
                               5645                 :         int         argpos;
                               5646                 :         int         widthpos;
                               5647                 :         int         flags;
 3678 tgl                      5648 ECB             :         int         width;
 4382 bruce                    5649                 :         Datum       value;
                               5650                 :         bool        isNull;
                               5651                 :         Oid         typid;
                               5652                 : 
                               5653                 :         /*
                               5654                 :          * If it's not the start of a conversion specifier, just copy it to
                               5655                 :          * the output buffer.
 4523 rhaas                    5656                 :          */
 4523 rhaas                    5657 GIC      351348 :         if (*cp != '%')
 4523 rhaas                    5658 ECB             :         {
 4523 rhaas                    5659 CBC      321807 :             appendStringInfoCharMacro(&str, *cp);
                               5660          321816 :             continue;
                               5661                 :         }
                               5662                 : 
 3678 tgl                      5663 GIC       29541 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
 4523 rhaas                    5664 ECB             : 
                               5665                 :         /* Easy case: %% outputs a single % */
 4523 rhaas                    5666 CBC       29541 :         if (*cp == '%')
                               5667                 :         {
                               5668               9 :             appendStringInfoCharMacro(&str, *cp);
 4523 rhaas                    5669 GBC           9 :             continue;
                               5670                 :         }
 4523 rhaas                    5671 ECB             : 
                               5672                 :         /* Parse the optional portions of the format specifier */
 3678 tgl                      5673 GIC       29532 :         cp = text_format_parse_format(cp, end_ptr,
                               5674                 :                                       &argpos, &widthpos,
                               5675                 :                                       &flags, &width);
                               5676                 : 
                               5677                 :         /*
 3678 tgl                      5678 ECB             :          * Next we should see the main conversion specifier.  Whether or not
                               5679                 :          * an argument position was present, it's known that at least one
                               5680                 :          * character remains in the string at this point.  Experience suggests
                               5681                 :          * that it's worth checking that that character is one of the expected
                               5682                 :          * ones before we try to fetch arguments, so as to produce the least
                               5683                 :          * confusing response to a mis-formatted specifier.
 4523 rhaas                    5684                 :          */
 3678 tgl                      5685 CBC       29520 :         if (strchr("sIL", *cp) == NULL)
 3678 tgl                      5686 GIC           3 :             ereport(ERROR,
                               5687                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5688                 :                      errmsg("unrecognized format() type specifier \"%.*s\"",
                               5689                 :                             pg_mblen(cp), cp),
                               5690                 :                      errhint("For a single \"%%\" use \"%%%%\".")));
 3678 tgl                      5691 ECB             : 
                               5692                 :         /* If indirect width was specified, get its value */
 3678 tgl                      5693 CBC       29517 :         if (widthpos >= 0)
                               5694                 :         {
                               5695                 :             /* Collect the specified or next argument position */
                               5696              21 :             if (widthpos > 0)
 3678 tgl                      5697 GIC          18 :                 arg = widthpos;
                               5698              21 :             if (arg >= nargs)
 4339 heikki.linnakangas       5699 LBC           0 :                 ereport(ERROR,
 3678 tgl                      5700 EUB             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5701                 :                          errmsg("too few arguments for format()")));
 4523 rhaas                    5702                 : 
                               5703                 :             /* Get the value and type of the selected argument */
 3678 tgl                      5704 GIC          21 :             if (!funcvariadic)
                               5705                 :             {
                               5706              21 :                 value = PG_GETARG_DATUM(arg);
                               5707              21 :                 isNull = PG_ARGISNULL(arg);
                               5708              21 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
                               5709                 :             }
                               5710                 :             else
                               5711                 :             {
 3678 tgl                      5712 LBC           0 :                 value = elements[arg - 1];
                               5713               0 :                 isNull = nulls[arg - 1];
                               5714               0 :                 typid = element_type;
 3678 tgl                      5715 ECB             :             }
 3678 tgl                      5716 GIC          21 :             if (!OidIsValid(typid))
 3678 tgl                      5717 UIC           0 :                 elog(ERROR, "could not determine data type of format() input");
 4339 heikki.linnakangas       5718 ECB             : 
 3678 tgl                      5719 CBC          21 :             arg++;
                               5720                 : 
 3678 tgl                      5721 ECB             :             /* We can treat NULL width the same as zero */
 3678 tgl                      5722 GIC          21 :             if (isNull)
                               5723               3 :                 width = 0;
                               5724              18 :             else if (typid == INT4OID)
                               5725              18 :                 width = DatumGetInt32(value);
 3678 tgl                      5726 UIC           0 :             else if (typid == INT2OID)
                               5727               0 :                 width = DatumGetInt16(value);
                               5728                 :             else
                               5729                 :             {
                               5730                 :                 /* For less-usual datatypes, convert to text then to int */
                               5731                 :                 char       *str;
                               5732                 : 
                               5733               0 :                 if (typid != prev_width_type)
                               5734                 :                 {
 3678 tgl                      5735 ECB             :                     Oid         typoutputfunc;
                               5736                 :                     bool        typIsVarlena;
                               5737                 : 
 3678 tgl                      5738 LBC           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
                               5739               0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
 3678 tgl                      5740 UIC           0 :                     prev_width_type = typid;
 3678 tgl                      5741 ECB             :                 }
                               5742                 : 
 3678 tgl                      5743 LBC           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
                               5744                 : 
 1722 andres                   5745 ECB             :                 /* pg_strtoint32 will complain about bad data or overflow */
 1722 andres                   5746 LBC           0 :                 width = pg_strtoint32(str);
 3678 tgl                      5747 EUB             : 
 3678 tgl                      5748 UIC           0 :                 pfree(str);
                               5749                 :             }
 4523 rhaas                    5750 ECB             :         }
                               5751                 : 
                               5752                 :         /* Collect the specified or next argument position */
 3678 tgl                      5753 GIC       29517 :         if (argpos > 0)
 3678 tgl                      5754 CBC          66 :             arg = argpos;
                               5755           29517 :         if (arg >= nargs)
 4523 rhaas                    5756 GIC          12 :             ereport(ERROR,
 4523 rhaas                    5757 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5758                 :                      errmsg("too few arguments for format()")));
                               5759                 : 
                               5760                 :         /* Get the value and type of the selected argument */
 3726 tgl                      5761 GIC       29505 :         if (!funcvariadic)
                               5762                 :         {
                               5763           28869 :             value = PG_GETARG_DATUM(arg);
                               5764           28869 :             isNull = PG_ARGISNULL(arg);
                               5765           28869 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
                               5766                 :         }
                               5767                 :         else
                               5768                 :         {
                               5769             636 :             value = elements[arg - 1];
                               5770             636 :             isNull = nulls[arg - 1];
                               5771             636 :             typid = element_type;
                               5772                 :         }
                               5773           29505 :         if (!OidIsValid(typid))
 3726 tgl                      5774 UIC           0 :             elog(ERROR, "could not determine data type of format() input");
                               5775                 : 
 3678 tgl                      5776 GIC       29505 :         arg++;
                               5777                 : 
                               5778                 :         /*
                               5779                 :          * Get the appropriate typOutput function, reusing previous one if
                               5780                 :          * same type as previous argument.  That's particularly useful in the
                               5781                 :          * variadic-array case, but often saves work even for ordinary calls.
                               5782                 :          */
 3726                          5783           29505 :         if (typid != prev_type)
 3726 tgl                      5784 ECB             :         {
                               5785                 :             Oid         typoutputfunc;
                               5786                 :             bool        typIsVarlena;
                               5787                 : 
 3726 tgl                      5788 CBC       13980 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
 3726 tgl                      5789 GIC       13980 :             fmgr_info(typoutputfunc, &typoutputfinfo);
                               5790           13980 :             prev_type = typid;
                               5791                 :         }
 3726 tgl                      5792 ECB             : 
 4523 rhaas                    5793                 :         /*
 3678 tgl                      5794                 :          * And now we can format the value.
 4523 rhaas                    5795                 :          */
 4523 rhaas                    5796 GIC       29505 :         switch (*cp)
                               5797                 :         {
 4523 rhaas                    5798 CBC       29505 :             case 's':
                               5799                 :             case 'I':
 4523 rhaas                    5800 ECB             :             case 'L':
 3726 tgl                      5801 GIC       29505 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
                               5802                 :                                               value, isNull,
 3678 tgl                      5803 ECB             :                                               flags, width);
 4523 rhaas                    5804 CBC       29502 :                 break;
 4523 rhaas                    5805 UIC           0 :             default:
                               5806                 :                 /* should not get here, because of previous check */
 4523 rhaas                    5807 LBC           0 :                 ereport(ERROR,
                               5808                 :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 1014 tgl                      5809 ECB             :                          errmsg("unrecognized format() type specifier \"%.*s\"",
                               5810                 :                                 pg_mblen(cp), cp),
                               5811                 :                          errhint("For a single \"%%\" use \"%%%%\".")));
                               5812                 :                 break;
 4523 rhaas                    5813                 :         }
                               5814                 :     }
                               5815                 : 
                               5816                 :     /* Don't need deconstruct_array results anymore. */
 3726 tgl                      5817 CBC       12501 :     if (elements != NULL)
 3726 tgl                      5818 GIC          21 :         pfree(elements);
 3726 tgl                      5819 CBC       12501 :     if (nulls != NULL)
                               5820              21 :         pfree(nulls);
                               5821                 : 
                               5822                 :     /* Generate results. */
 4523 rhaas                    5823           12501 :     result = cstring_to_text_with_len(str.data, str.len);
 4523 rhaas                    5824 GIC       12501 :     pfree(str.data);
                               5825                 : 
 4523 rhaas                    5826 CBC       12501 :     PG_RETURN_TEXT_P(result);
 4523 rhaas                    5827 ECB             : }
                               5828                 : 
                               5829                 : /*
 3678 tgl                      5830                 :  * Parse contiguous digits as a decimal number.
 3678 tgl                      5831 EUB             :  *
                               5832                 :  * Returns true if some digits could be parsed.
                               5833                 :  * The value is returned into *value, and *ptr is advanced to the next
                               5834                 :  * character to be parsed.
 3678 tgl                      5835 ECB             :  *
                               5836                 :  * Note parsing invariant: at least one character is known available before
                               5837                 :  * string end (end_ptr) at entry, and this is still true at exit.
                               5838                 :  */
                               5839                 : static bool
 3678 tgl                      5840 GIC       59046 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
 3678 tgl                      5841 ECB             : {
 3678 tgl                      5842 GIC       59046 :     bool        found = false;
                               5843           59046 :     const char *cp = *ptr;
 3678 tgl                      5844 CBC       59046 :     int         val = 0;
                               5845                 : 
 3678 tgl                      5846 GIC       59202 :     while (*cp >= '0' && *cp <= '9')
                               5847                 :     {
 1944 andres                   5848             159 :         int8        digit = (*cp - '0');
 3678 tgl                      5849 ECB             : 
 1944 andres                   5850 CBC         159 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
 1944 andres                   5851 GIC         159 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
 3678 tgl                      5852 UIC           0 :             ereport(ERROR,
                               5853                 :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 3678 tgl                      5854 ECB             :                      errmsg("number is out of range")));
 3678 tgl                      5855 GIC         159 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5856             156 :         found = true;
                               5857                 :     }
                               5858                 : 
                               5859           59043 :     *ptr = cp;
                               5860           59043 :     *value = val;
 3678 tgl                      5861 ECB             : 
 3678 tgl                      5862 GIC       59043 :     return found;
                               5863                 : }
                               5864                 : 
                               5865                 : /*
                               5866                 :  * Parse a format specifier (generally following the SUS printf spec).
                               5867                 :  *
                               5868                 :  * We have already advanced over the initial '%', and we are looking for
 3678 tgl                      5869 ECB             :  * [argpos][flags][width]type (but the type character is not consumed here).
                               5870                 :  *
                               5871                 :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
                               5872                 :  * Output parameters:
 3260 bruce                    5873                 :  *  argpos: argument position for value to be printed.  -1 means unspecified.
                               5874                 :  *  widthpos: argument position for width.  Zero means the argument position
 3678 tgl                      5875                 :  *          was unspecified (ie, take the next arg) and -1 means no width
                               5876                 :  *          argument (width was omitted or specified as a constant).
                               5877                 :  *  flags: bitmask of flags.
                               5878                 :  *  width: directly-specified width value.  Zero means the width was omitted
                               5879                 :  *          (note it's not necessary to distinguish this case from an explicit
                               5880                 :  *          zero width value).
                               5881                 :  *
                               5882                 :  * The function result is the next character position to be parsed, ie, the
                               5883                 :  * location where the type character is/should be.
                               5884                 :  *
                               5885                 :  * Note parsing invariant: at least one character is known available before
                               5886                 :  * string end (end_ptr) at entry, and this is still true at exit.
                               5887                 :  */
                               5888                 : static const char *
 3678 tgl                      5889 CBC       29532 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
                               5890                 :                          int *argpos, int *widthpos,
 3678 tgl                      5891 ECB             :                          int *flags, int *width)
                               5892                 : {
 3678 tgl                      5893 CBC       29532 :     const char *cp = start_ptr;
                               5894                 :     int         n;
 3678 tgl                      5895 ECB             : 
                               5896                 :     /* set defaults for output parameters */
 3678 tgl                      5897 CBC       29532 :     *argpos = -1;
 3678 tgl                      5898 GIC       29532 :     *widthpos = -1;
                               5899           29532 :     *flags = 0;
 3678 tgl                      5900 CBC       29532 :     *width = 0;
                               5901                 : 
                               5902                 :     /* try to identify first number */
                               5903           29532 :     if (text_format_parse_digits(&cp, end_ptr, &n))
                               5904                 :     {
 3678 tgl                      5905 GIC          87 :         if (*cp != '$')
                               5906                 :         {
                               5907                 :             /* Must be just a width and a type, so we're done */
                               5908              12 :             *width = n;
                               5909              12 :             return cp;
 3678 tgl                      5910 ECB             :         }
                               5911                 :         /* The number was argument position */
 3678 tgl                      5912 GIC          75 :         *argpos = n;
 3678 tgl                      5913 ECB             :         /* Explicit 0 for argument index is immediately refused */
 3678 tgl                      5914 GIC          75 :         if (n == 0)
                               5915               3 :             ereport(ERROR,
                               5916                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 3678 tgl                      5917 ECB             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
 3678 tgl                      5918 GIC          72 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
 3678 tgl                      5919 ECB             :     }
                               5920                 : 
                               5921                 :     /* Handle flags (only minus is supported now) */
 3678 tgl                      5922 GIC       29529 :     while (*cp == '-')
 3678 tgl                      5923 ECB             :     {
 3678 tgl                      5924 GIC          15 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
                               5925              15 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
 3678 tgl                      5926 ECB             :     }
                               5927                 : 
 3678 tgl                      5928 CBC       29514 :     if (*cp == '*')
 3678 tgl                      5929 EUB             :     {
                               5930                 :         /* Handle indirect width */
 3678 tgl                      5931 GIC          24 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
 3678 tgl                      5932 CBC          24 :         if (text_format_parse_digits(&cp, end_ptr, &n))
                               5933                 :         {
 3678 tgl                      5934 ECB             :             /* number in this position must be closed by $ */
 3678 tgl                      5935 CBC          21 :             if (*cp != '$')
 3678 tgl                      5936 UIC           0 :                 ereport(ERROR,
 3678 tgl                      5937 ECB             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 2118                          5938                 :                          errmsg("width argument position must be ended by \"$\"")));
                               5939                 :             /* The number was width argument position */
 3678 tgl                      5940 GIC          21 :             *widthpos = n;
 3678 tgl                      5941 ECB             :             /* Explicit 0 for argument index is immediately refused */
 3678 tgl                      5942 CBC          21 :             if (n == 0)
                               5943               3 :                 ereport(ERROR,
                               5944                 :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5945                 :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
 3678 tgl                      5946 GIC          18 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5947                 :         }
 3678 tgl                      5948 ECB             :         else
 3678 tgl                      5949 CBC           3 :             *widthpos = 0;      /* width's argument position is unspecified */
 3678 tgl                      5950 ECB             :     }
                               5951                 :     else
                               5952                 :     {
                               5953                 :         /* Check for direct width specification */
 3678 tgl                      5954 GIC       29490 :         if (text_format_parse_digits(&cp, end_ptr, &n))
                               5955              15 :             *width = n;
                               5956                 :     }
                               5957                 : 
                               5958                 :     /* cp should now be pointing at type character */
                               5959           29508 :     return cp;
                               5960                 : }
                               5961                 : 
 3678 tgl                      5962 ECB             : /*
                               5963                 :  * Format a %s, %I, or %L conversion
                               5964                 :  */
                               5965                 : static void
 4523 rhaas                    5966 GIC       29505 : text_format_string_conversion(StringInfo buf, char conversion,
                               5967                 :                               FmgrInfo *typOutputInfo,
                               5968                 :                               Datum value, bool isNull,
                               5969                 :                               int flags, int width)
                               5970                 : {
                               5971                 :     char       *str;
 4523 rhaas                    5972 EUB             : 
                               5973                 :     /* Handle NULL arguments before trying to stringify the value. */
 4523 rhaas                    5974 GBC       29505 :     if (isNull)
                               5975                 :     {
 3678 tgl                      5976             153 :         if (conversion == 's')
                               5977             117 :             text_format_append_string(buf, "", flags, width);
                               5978              36 :         else if (conversion == 'L')
 3678 tgl                      5979 GIC          33 :             text_format_append_string(buf, "NULL", flags, width);
 4523 rhaas                    5980 GBC           3 :         else if (conversion == 'I')
 4382 bruce                    5981 GIC           3 :             ereport(ERROR,
                               5982                 :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
                               5983                 :                      errmsg("null values cannot be formatted as an SQL identifier")));
 4523 rhaas                    5984             150 :         return;
                               5985                 :     }
                               5986                 : 
                               5987                 :     /* Stringify. */
 3726 tgl                      5988           29352 :     str = OutputFunctionCall(typOutputInfo, value);
                               5989                 : 
                               5990                 :     /* Escape. */
 4523 rhaas                    5991           29352 :     if (conversion == 'I')
                               5992                 :     {
                               5993                 :         /* quote_identifier may or may not allocate a new string. */
 3678 tgl                      5994            1553 :         text_format_append_string(buf, quote_identifier(str), flags, width);
                               5995                 :     }
 4523 rhaas                    5996           27799 :     else if (conversion == 'L')
                               5997                 :     {
 4382 bruce                    5998            1289 :         char       *qstr = quote_literal_cstr(str);
                               5999                 : 
 3678 tgl                      6000            1289 :         text_format_append_string(buf, qstr, flags, width);
                               6001                 :         /* quote_literal_cstr() always allocates a new string */
 4523 rhaas                    6002            1289 :         pfree(qstr);
                               6003                 :     }
                               6004                 :     else
 3678 tgl                      6005           26510 :         text_format_append_string(buf, str, flags, width);
                               6006                 : 
                               6007                 :     /* Cleanup. */
 4523 rhaas                    6008 CBC       29352 :     pfree(str);
                               6009                 : }
 4523 rhaas                    6010 ECB             : 
 3678 tgl                      6011                 : /*
                               6012                 :  * Append str to buf, padding as directed by flags/width
                               6013                 :  */
                               6014                 : static void
 3678 tgl                      6015 CBC       29502 : text_format_append_string(StringInfo buf, const char *str,
 3678 tgl                      6016 ECB             :                           int flags, int width)
                               6017                 : {
 3678 tgl                      6018 GIC       29502 :     bool        align_to_left = false;
                               6019                 :     int         len;
                               6020                 : 
                               6021                 :     /* fast path for typical easy case */
                               6022           29502 :     if (width == 0)
                               6023                 :     {
                               6024           29460 :         appendStringInfoString(buf, str);
                               6025           29460 :         return;
                               6026                 :     }
                               6027                 : 
 3678 tgl                      6028 CBC          42 :     if (width < 0)
                               6029                 :     {
                               6030                 :         /* Negative width: implicit '-' flag, then take absolute value */
 3678 tgl                      6031 GIC           3 :         align_to_left = true;
 3678 tgl                      6032 ECB             :         /* -INT_MIN is undefined */
 3678 tgl                      6033 GIC           3 :         if (width <= INT_MIN)
 3678 tgl                      6034 LBC           0 :             ereport(ERROR,
 3678 tgl                      6035 ECB             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 3678 tgl                      6036 EUB             :                      errmsg("number is out of range")));
 3678 tgl                      6037 GIC           3 :         width = -width;
                               6038                 :     }
                               6039              39 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
                               6040              12 :         align_to_left = true;
                               6041                 : 
 3678 tgl                      6042 CBC          42 :     len = pg_mbstrlen(str);
                               6043              42 :     if (align_to_left)
 3678 tgl                      6044 EUB             :     {
                               6045                 :         /* left justify */
 3678 tgl                      6046 CBC          15 :         appendStringInfoString(buf, str);
                               6047              15 :         if (len < width)
 3678 tgl                      6048 GIC          15 :             appendStringInfoSpaces(buf, width - len);
 3678 tgl                      6049 ECB             :     }
                               6050                 :     else
                               6051                 :     {
                               6052                 :         /* right justify */
 3678 tgl                      6053 CBC          27 :         if (len < width)
                               6054              27 :             appendStringInfoSpaces(buf, width - len);
 3678 tgl                      6055 GIC          27 :         appendStringInfoString(buf, str);
                               6056                 :     }
                               6057                 : }
                               6058                 : 
                               6059                 : /*
                               6060                 :  * text_format_nv - nonvariadic wrapper for text_format function.
                               6061                 :  *
                               6062                 :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
 4339 heikki.linnakangas       6063 ECB             :  * which checks that all built-in functions that share the implementing C
                               6064                 :  * function take the same number of arguments.
 4523 rhaas                    6065                 :  */
                               6066                 : Datum
 4523 rhaas                    6067 CBC          15 : text_format_nv(PG_FUNCTION_ARGS)
                               6068                 : {
 4523 rhaas                    6069 GIC          15 :     return text_format(fcinfo);
                               6070                 : }
                               6071                 : 
                               6072                 : /*
                               6073                 :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
                               6074                 :  * for this use case.
                               6075                 :  */
 3069 rhaas                    6076 ECB             : static inline bool
 3069 rhaas                    6077 UIC           0 : rest_of_char_same(const char *s1, const char *s2, int len)
 3069 rhaas                    6078 ECB             : {
 3069 rhaas                    6079 UIC           0 :     while (len > 0)
                               6080                 :     {
                               6081               0 :         len--;
                               6082               0 :         if (s1[len] != s2[len])
 3069 rhaas                    6083 LBC           0 :             return false;
 3069 rhaas                    6084 EUB             :     }
 3069 rhaas                    6085 UIC           0 :     return true;
                               6086                 : }
                               6087                 : 
 3069 rhaas                    6088 ECB             : /* Expand each Levenshtein distance variant */
                               6089                 : #include "levenshtein.c"
                               6090                 : #define LEVENSHTEIN_LESS_EQUAL
                               6091                 : #include "levenshtein.c"
 1109 peter                    6092                 : 
                               6093                 : 
                               6094                 : /*
                               6095                 :  * The following *ClosestMatch() functions can be used to determine whether a
                               6096                 :  * user-provided string resembles any known valid values, which is useful for
                               6097                 :  * providing hints in log messages, among other things.  Use these functions
                               6098                 :  * like so:
                               6099                 :  *
                               6100                 :  *      initClosestMatch(&state, source_string, max_distance);
                               6101                 :  *
                               6102                 :  *      for (int i = 0; i < num_valid_strings; i++)
                               6103                 :  *          updateClosestMatch(&state, valid_strings[i]);
                               6104                 :  *
                               6105                 :  *      closestMatch = getClosestMatch(&state);
                               6106                 :  */
                               6107                 : 
                               6108                 : /*
                               6109                 :  * Initialize the given state with the source string and maximum Levenshtein
                               6110                 :  * distance to consider.
                               6111                 :  */
                               6112                 : void
  205 peter                    6113 GNC          28 : initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
                               6114                 : {
                               6115              28 :     Assert(state);
                               6116              28 :     Assert(max_d >= 0);
                               6117                 : 
                               6118              28 :     state->source = source;
                               6119              28 :     state->min_d = -1;
                               6120              28 :     state->max_d = max_d;
                               6121              28 :     state->match = NULL;
                               6122              28 : }
                               6123                 : 
                               6124                 : /*
                               6125                 :  * If the candidate string is a closer match than the current one saved (or
                               6126                 :  * there is no match saved), save it as the closest match.
                               6127                 :  *
                               6128                 :  * If the source or candidate string is NULL, empty, or too long, this function
                               6129                 :  * takes no action.  Likewise, if the Levenshtein distance exceeds the maximum
                               6130                 :  * allowed or more than half the characters are different, no action is taken.
                               6131                 :  */
                               6132                 : void
                               6133             159 : updateClosestMatch(ClosestMatchState *state, const char *candidate)
                               6134                 : {
                               6135                 :     int         dist;
                               6136                 : 
                               6137             159 :     Assert(state);
                               6138                 : 
                               6139             159 :     if (state->source == NULL || state->source[0] == '\0' ||
                               6140             159 :         candidate == NULL || candidate[0] == '\0')
  205 peter                    6141 UNC           0 :         return;
                               6142                 : 
                               6143                 :     /*
                               6144                 :      * To avoid ERROR-ing, we check the lengths here instead of setting
                               6145                 :      * 'trusted' to false in the call to varstr_levenshtein_less_equal().
                               6146                 :      */
  205 peter                    6147 GNC         159 :     if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
                               6148             159 :         strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
  205 peter                    6149 UNC           0 :         return;
                               6150                 : 
  205 peter                    6151 GNC         159 :     dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
                               6152             159 :                                          candidate, strlen(candidate), 1, 1, 1,
                               6153                 :                                          state->max_d, true);
                               6154             159 :     if (dist <= state->max_d &&
                               6155              28 :         dist <= strlen(state->source) / 2 &&
                               6156               7 :         (state->min_d == -1 || dist < state->min_d))
                               6157                 :     {
                               6158               7 :         state->min_d = dist;
                               6159               7 :         state->match = candidate;
                               6160                 :     }
                               6161                 : }
                               6162                 : 
                               6163                 : /*
                               6164                 :  * Return the closest match.  If no suitable candidates were provided via
                               6165                 :  * updateClosestMatch(), return NULL.
                               6166                 :  */
                               6167                 : const char *
                               6168              28 : getClosestMatch(ClosestMatchState *state)
                               6169                 : {
                               6170              28 :     Assert(state);
                               6171                 : 
                               6172              28 :     return state->match;
                               6173                 : }
                               6174                 : 
                               6175                 : 
 1109 peter                    6176 ECB             : /*
                               6177                 :  * Unicode support
                               6178                 :  */
                               6179                 : 
                               6180                 : static UnicodeNormalizationForm
 1109 peter                    6181 GIC          93 : unicode_norm_form_from_string(const char *formstr)
                               6182                 : {
 1109 peter                    6183 CBC          93 :     UnicodeNormalizationForm form = -1;
                               6184                 : 
                               6185                 :     /*
                               6186                 :      * Might as well check this while we're here.
 1109 peter                    6187 ECB             :      */
 1109 peter                    6188 GIC          93 :     if (GetDatabaseEncoding() != PG_UTF8)
 1109 peter                    6189 LBC           0 :         ereport(ERROR,
 1109 peter                    6190 ECB             :                 (errcode(ERRCODE_SYNTAX_ERROR),
                               6191                 :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
                               6192                 : 
 1109 peter                    6193 GIC          93 :     if (pg_strcasecmp(formstr, "NFC") == 0)
                               6194              33 :         form = UNICODE_NFC;
                               6195              60 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
                               6196              18 :         form = UNICODE_NFD;
                               6197              42 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
                               6198              18 :         form = UNICODE_NFKC;
 1109 peter                    6199 CBC          24 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
 1109 peter                    6200 GIC          18 :         form = UNICODE_NFKD;
                               6201                 :     else
 1109 peter                    6202 CBC           6 :         ereport(ERROR,
 1109 peter                    6203 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               6204                 :                  errmsg("invalid normalization form: %s", formstr)));
                               6205                 : 
 1109 peter                    6206 GIC          87 :     return form;
 1109 peter                    6207 ECB             : }
                               6208                 : 
                               6209                 : Datum
 1109 peter                    6210 CBC          24 : unicode_normalize_func(PG_FUNCTION_ARGS)
 1109 peter                    6211 ECB             : {
 1109 peter                    6212 GIC          24 :     text       *input = PG_GETARG_TEXT_PP(0);
                               6213              24 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
 1109 peter                    6214 ECB             :     UnicodeNormalizationForm form;
                               6215                 :     int         size;
                               6216                 :     pg_wchar   *input_chars;
                               6217                 :     pg_wchar   *output_chars;
                               6218                 :     unsigned char *p;
                               6219                 :     text       *result;
                               6220                 :     int         i;
                               6221                 : 
 1109 peter                    6222 CBC          24 :     form = unicode_norm_form_from_string(formstr);
 1109 peter                    6223 ECB             : 
                               6224                 :     /* convert to pg_wchar */
 1109 peter                    6225 GIC          21 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
 1109 peter                    6226 CBC          21 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
                               6227              21 :     p = (unsigned char *) VARDATA_ANY(input);
 1109 peter                    6228 GIC          84 :     for (i = 0; i < size; i++)
 1109 peter                    6229 ECB             :     {
 1109 peter                    6230 CBC          63 :         input_chars[i] = utf8_to_unicode(p);
 1109 peter                    6231 GIC          63 :         p += pg_utf_mblen(p);
 1109 peter                    6232 ECB             :     }
 1109 peter                    6233 CBC          21 :     input_chars[i] = (pg_wchar) '\0';
 1109 peter                    6234 GIC          21 :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
 1109 peter                    6235 ECB             : 
                               6236                 :     /* action */
 1109 peter                    6237 CBC          21 :     output_chars = unicode_normalize(form, input_chars);
                               6238                 : 
                               6239                 :     /* convert back to UTF-8 string */
 1109 peter                    6240 GIC          21 :     size = 0;
                               6241              81 :     for (pg_wchar *wp = output_chars; *wp; wp++)
                               6242                 :     {
                               6243                 :         unsigned char buf[4];
                               6244                 : 
                               6245              60 :         unicode_to_utf8(*wp, buf);
                               6246              60 :         size += pg_utf_mblen(buf);
                               6247                 :     }
                               6248                 : 
                               6249              21 :     result = palloc(size + VARHDRSZ);
                               6250              21 :     SET_VARSIZE(result, size + VARHDRSZ);
                               6251                 : 
                               6252              21 :     p = (unsigned char *) VARDATA_ANY(result);
 1109 peter                    6253 CBC          81 :     for (pg_wchar *wp = output_chars; *wp; wp++)
                               6254                 :     {
                               6255              60 :         unicode_to_utf8(*wp, p);
                               6256              60 :         p += pg_utf_mblen(p);
                               6257                 :     }
 1109 peter                    6258 GIC          21 :     Assert((char *) p == (char *) result + size + VARHDRSZ);
                               6259                 : 
                               6260              21 :     PG_RETURN_TEXT_P(result);
                               6261                 : }
                               6262                 : 
                               6263                 : /*
                               6264                 :  * Check whether the string is in the specified Unicode normalization form.
                               6265                 :  *
                               6266                 :  * This is done by converting the string to the specified normal form and then
 1109 peter                    6267 ECB             :  * comparing that to the original string.  To speed that up, we also apply the
                               6268                 :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
                               6269                 :  * answer for many strings by just scanning the string once.
                               6270                 :  *
                               6271                 :  * This function should generally be optimized for the case where the string
                               6272                 :  * is in fact normalized.  In that case, we'll end up looking at the entire
                               6273                 :  * string, so it's probably not worth doing any incremental conversion etc.
                               6274                 :  */
                               6275                 : Datum
 1109 peter                    6276 CBC          69 : unicode_is_normalized(PG_FUNCTION_ARGS)
                               6277                 : {
                               6278              69 :     text       *input = PG_GETARG_TEXT_PP(0);
                               6279              69 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
                               6280                 :     UnicodeNormalizationForm form;
                               6281                 :     int         size;
 1109 peter                    6282 ECB             :     pg_wchar   *input_chars;
                               6283                 :     pg_wchar   *output_chars;
                               6284                 :     unsigned char *p;
                               6285                 :     int         i;
                               6286                 :     UnicodeNormalizationQC quickcheck;
                               6287                 :     int         output_size;
                               6288                 :     bool        result;
                               6289                 : 
 1109 peter                    6290 GIC          69 :     form = unicode_norm_form_from_string(formstr);
 1109 peter                    6291 ECB             : 
                               6292                 :     /* convert to pg_wchar */
 1109 peter                    6293 CBC          66 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
 1109 peter                    6294 GIC          66 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
 1109 peter                    6295 CBC          66 :     p = (unsigned char *) VARDATA_ANY(input);
                               6296             252 :     for (i = 0; i < size; i++)
                               6297                 :     {
                               6298             186 :         input_chars[i] = utf8_to_unicode(p);
 1109 peter                    6299 GIC         186 :         p += pg_utf_mblen(p);
                               6300                 :     }
                               6301              66 :     input_chars[i] = (pg_wchar) '\0';
                               6302              66 :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
                               6303                 : 
                               6304                 :     /* quick check (see UAX #15) */
 1109 peter                    6305 CBC          66 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
 1109 peter                    6306 GIC          66 :     if (quickcheck == UNICODE_NORM_QC_YES)
 1109 peter                    6307 CBC          21 :         PG_RETURN_BOOL(true);
                               6308              45 :     else if (quickcheck == UNICODE_NORM_QC_NO)
                               6309               6 :         PG_RETURN_BOOL(false);
                               6310                 : 
 1109 peter                    6311 ECB             :     /* normalize and compare with original */
 1109 peter                    6312 GIC          39 :     output_chars = unicode_normalize(form, input_chars);
                               6313                 : 
                               6314              39 :     output_size = 0;
 1109 peter                    6315 CBC         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
 1109 peter                    6316 GIC         123 :         output_size++;
 1109 peter                    6317 ECB             : 
 1109 peter                    6318 CBC          57 :     result = (size == output_size) &&
                               6319              18 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
 1109 peter                    6320 ECB             : 
 1109 peter                    6321 CBC          39 :     PG_RETURN_BOOL(result);
 1109 peter                    6322 ECB             : }
  742 peter                    6323 EUB             : 
                               6324                 : /*
                               6325                 :  * Check if first n chars are hexadecimal digits
                               6326                 :  */
                               6327                 : static bool
  742 peter                    6328 GIC          78 : isxdigits_n(const char *instr, size_t n)
                               6329                 : {
                               6330             330 :     for (size_t i = 0; i < n; i++)
  742 peter                    6331 CBC         285 :         if (!isxdigit((unsigned char) instr[i]))
  742 peter                    6332 GIC          33 :             return false;
  742 peter                    6333 ECB             : 
  742 peter                    6334 GIC          45 :     return true;
  742 peter                    6335 ECB             : }
                               6336                 : 
                               6337                 : static unsigned int
  742 peter                    6338 CBC         252 : hexval(unsigned char c)
                               6339                 : {
  742 peter                    6340 GIC         252 :     if (c >= '0' && c <= '9')
                               6341             192 :         return c - '0';
                               6342              60 :     if (c >= 'a' && c <= 'f')
                               6343              30 :         return c - 'a' + 0xA;
                               6344              30 :     if (c >= 'A' && c <= 'F')
  742 peter                    6345 CBC          30 :         return c - 'A' + 0xA;
  742 peter                    6346 UIC           0 :     elog(ERROR, "invalid hexadecimal digit");
  742 peter                    6347 ECB             :     return 0;                   /* not reached */
                               6348                 : }
                               6349                 : 
                               6350                 : /*
                               6351                 :  * Translate string with hexadecimal digits to number
                               6352                 :  */
                               6353                 : static unsigned int
  742 peter                    6354 GIC          45 : hexval_n(const char *instr, size_t n)
  742 peter                    6355 ECB             : {
  742 peter                    6356 CBC          45 :     unsigned int result = 0;
                               6357                 : 
                               6358             297 :     for (size_t i = 0; i < n; i++)
  742 peter                    6359 GIC         252 :         result += hexval(instr[i]) << (4 * (n - i - 1));
  742 peter                    6360 ECB             : 
  742 peter                    6361 GIC          45 :     return result;
  742 peter                    6362 ECB             : }
                               6363                 : 
                               6364                 : /*
                               6365                 :  * Replaces Unicode escape sequences by Unicode characters
                               6366                 :  */
                               6367                 : Datum
  742 peter                    6368 GBC          33 : unistr(PG_FUNCTION_ARGS)
  742 peter                    6369 ECB             : {
  742 peter                    6370 CBC          33 :     text       *input_text = PG_GETARG_TEXT_PP(0);
  742 peter                    6371 ECB             :     char       *instr;
                               6372                 :     int         len;
                               6373                 :     StringInfoData str;
                               6374                 :     text       *result;
  742 peter                    6375 CBC          33 :     pg_wchar    pair_first = 0;
                               6376                 :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
  742 peter                    6377 ECB             : 
  742 peter                    6378 GIC          33 :     instr = VARDATA_ANY(input_text);
  742 peter                    6379 CBC          33 :     len = VARSIZE_ANY_EXHDR(input_text);
                               6380                 : 
                               6381              33 :     initStringInfo(&str);
  742 peter                    6382 EUB             : 
  742 peter                    6383 GIC         255 :     while (len > 0)
                               6384                 :     {
                               6385             243 :         if (instr[0] == '\\')
  742 peter                    6386 ECB             :         {
  742 peter                    6387 GIC          51 :             if (len >= 2 &&
  742 peter                    6388 CBC          51 :                 instr[1] == '\\')
                               6389                 :             {
  742 peter                    6390 GBC           3 :                 if (pair_first)
  742 peter                    6391 UBC           0 :                     goto invalid_pair;
  742 peter                    6392 GIC           3 :                 appendStringInfoChar(&str, '\\');
                               6393               3 :                 instr += 2;
  742 peter                    6394 CBC           3 :                 len -= 2;
                               6395                 :             }
                               6396              48 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
  742 peter                    6397 GBC          33 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
  742 peter                    6398 GIC          15 :             {
  742 peter                    6399 ECB             :                 pg_wchar    unicode;
  742 peter                    6400 CBC          21 :                 int         offset = instr[1] == 'u' ? 2 : 1;
                               6401                 : 
  742 peter                    6402 GIC          21 :                 unicode = hexval_n(instr + offset, 4);
  742 peter                    6403 ECB             : 
  742 peter                    6404 CBC          21 :                 if (!is_valid_unicode_codepoint(unicode))
  742 peter                    6405 UIC           0 :                     ereport(ERROR,
                               6406                 :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  742 peter                    6407 ECB             :                             errmsg("invalid Unicode code point: %04X", unicode));
                               6408                 : 
  742 peter                    6409 GIC          21 :                 if (pair_first)
  742 peter                    6410 ECB             :                 {
  742 peter                    6411 CBC           6 :                     if (is_utf16_surrogate_second(unicode))
                               6412                 :                     {
  742 peter                    6413 UIC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
  742 peter                    6414 LBC           0 :                         pair_first = 0;
                               6415                 :                     }
  742 peter                    6416 ECB             :                     else
  742 peter                    6417 CBC           6 :                         goto invalid_pair;
                               6418                 :                 }
  742 peter                    6419 GIC          15 :                 else if (is_utf16_surrogate_second(unicode))
  742 peter                    6420 UIC           0 :                     goto invalid_pair;
  742 peter                    6421 ECB             : 
  742 peter                    6422 GIC          15 :                 if (is_utf16_surrogate_first(unicode))
  742 peter                    6423 CBC           9 :                     pair_first = unicode;
                               6424                 :                 else
  742 peter                    6425 EUB             :                 {
  742 peter                    6426 GBC           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
  742 peter                    6427 GIC           6 :                     appendStringInfoString(&str, cbuf);
                               6428                 :                 }
  742 peter                    6429 ECB             : 
  742 peter                    6430 GIC          15 :                 instr += 4 + offset;
  742 peter                    6431 CBC          15 :                 len -= 4 + offset;
  742 peter                    6432 EUB             :             }
  742 peter                    6433 GIC          27 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
  742 peter                    6434 CBC           6 :             {
  742 peter                    6435 ECB             :                 pg_wchar    unicode;
                               6436                 : 
  742 peter                    6437 GIC          12 :                 unicode = hexval_n(instr + 2, 6);
  742 peter                    6438 ECB             : 
  742 peter                    6439 CBC          12 :                 if (!is_valid_unicode_codepoint(unicode))
  742 peter                    6440 GIC           3 :                     ereport(ERROR,
                               6441                 :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  742 peter                    6442 ECB             :                             errmsg("invalid Unicode code point: %04X", unicode));
                               6443                 : 
  742 peter                    6444 GIC           9 :                 if (pair_first)
  742 peter                    6445 ECB             :                 {
  742 peter                    6446 CBC           3 :                     if (is_utf16_surrogate_second(unicode))
                               6447                 :                     {
  742 peter                    6448 UIC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
  742 peter                    6449 LBC           0 :                         pair_first = 0;
                               6450                 :                     }
  742 peter                    6451 ECB             :                     else
  742 peter                    6452 CBC           3 :                         goto invalid_pair;
                               6453                 :                 }
  742 peter                    6454 GIC           6 :                 else if (is_utf16_surrogate_second(unicode))
  742 peter                    6455 UIC           0 :                     goto invalid_pair;
  742 peter                    6456 ECB             : 
  742 peter                    6457 GIC           6 :                 if (is_utf16_surrogate_first(unicode))
  742 peter                    6458 CBC           3 :                     pair_first = unicode;
                               6459                 :                 else
  742 peter                    6460 EUB             :                 {
  742 peter                    6461 GBC           3 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
  742 peter                    6462 GIC           3 :                     appendStringInfoString(&str, cbuf);
                               6463                 :                 }
  742 peter                    6464 ECB             : 
  742 peter                    6465 GIC           6 :                 instr += 8;
  742 peter                    6466 CBC           6 :                 len -= 8;
  742 peter                    6467 EUB             :             }
  742 peter                    6468 GIC          15 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
  742 peter                    6469 CBC           6 :             {
  742 peter                    6470 ECB             :                 pg_wchar    unicode;
                               6471                 : 
  742 peter                    6472 GIC          12 :                 unicode = hexval_n(instr + 2, 8);
  742 peter                    6473 ECB             : 
  742 peter                    6474 CBC          12 :                 if (!is_valid_unicode_codepoint(unicode))
  742 peter                    6475 GIC           3 :                     ereport(ERROR,
                               6476                 :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  742 peter                    6477 ECB             :                             errmsg("invalid Unicode code point: %04X", unicode));
                               6478                 : 
  742 peter                    6479 GIC           9 :                 if (pair_first)
                               6480                 :                 {
  742 peter                    6481 CBC           3 :                     if (is_utf16_surrogate_second(unicode))
                               6482                 :                     {
  742 peter                    6483 UIC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                               6484               0 :                         pair_first = 0;
                               6485                 :                     }
                               6486                 :                     else
  742 peter                    6487 GIC           3 :                         goto invalid_pair;
  742 peter                    6488 ECB             :                 }
  742 peter                    6489 GBC           6 :                 else if (is_utf16_surrogate_second(unicode))
  742 peter                    6490 UIC           0 :                     goto invalid_pair;
  742 peter                    6491 ECB             : 
  742 peter                    6492 CBC           6 :                 if (is_utf16_surrogate_first(unicode))
  742 peter                    6493 GIC           3 :                     pair_first = unicode;
                               6494                 :                 else
                               6495                 :                 {
                               6496               3 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
  742 peter                    6497 CBC           3 :                     appendStringInfoString(&str, cbuf);
  742 peter                    6498 ECB             :                 }
                               6499                 : 
  742 peter                    6500 CBC           6 :                 instr += 10;
                               6501               6 :                 len -= 10;
                               6502                 :             }
  742 peter                    6503 ECB             :             else
  742 peter                    6504 GIC           3 :                 ereport(ERROR,
  742 peter                    6505 ECB             :                         (errcode(ERRCODE_SYNTAX_ERROR),
                               6506                 :                          errmsg("invalid Unicode escape"),
                               6507                 :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
                               6508                 :         }
                               6509                 :         else
                               6510                 :         {
  742 peter                    6511 GIC         192 :             if (pair_first)
  742 peter                    6512 UIC           0 :                 goto invalid_pair;
                               6513                 : 
  742 peter                    6514 GIC         192 :             appendStringInfoChar(&str, *instr++);
                               6515             192 :             len--;
                               6516                 :         }
                               6517                 :     }
                               6518                 : 
                               6519                 :     /* unfinished surrogate pair? */
                               6520              12 :     if (pair_first)
                               6521               3 :         goto invalid_pair;
                               6522                 : 
                               6523               9 :     result = cstring_to_text_with_len(str.data, str.len);
                               6524               9 :     pfree(str.data);
                               6525                 : 
                               6526               9 :     PG_RETURN_TEXT_P(result);
                               6527                 : 
                               6528              15 : invalid_pair:
                               6529              15 :     ereport(ERROR,
                               6530                 :             (errcode(ERRCODE_SYNTAX_ERROR),
                               6531                 :              errmsg("invalid Unicode surrogate pair")));
                               6532                 :     PG_RETURN_NULL();           /* keep compiler quiet */
                               6533                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a