LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - like.c (source / functions) Coverage Total Hit LBC UIC UBC GIC GNC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 87.8 % 139 122 3 8 6 75 2 45 11 74
Current Date: 2023-04-08 15:15:32 Functions: 93.8 % 16 15 1 15 1 15
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * like.c
       4                 :  *    like expression handling code.
       5                 :  *
       6                 :  *   NOTES
       7                 :  *      A big hack of the regexp.c code!! Contributed by
       8                 :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9                 :  *
      10                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      11                 :  * Portions Copyright (c) 1994, Regents of the University of California
      12                 :  *
      13                 :  * IDENTIFICATION
      14                 :  *  src/backend/utils/adt/like.c
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include <ctype.h>
      21                 : 
      22                 : #include "catalog/pg_collation.h"
      23                 : #include "mb/pg_wchar.h"
      24                 : #include "miscadmin.h"
      25                 : #include "utils/builtins.h"
      26                 : #include "utils/pg_locale.h"
      27                 : #include "varatt.h"
      28                 : 
      29                 : 
      30                 : #define LIKE_TRUE                       1
      31                 : #define LIKE_FALSE                      0
      32                 : #define LIKE_ABORT                      (-1)
      33                 : 
      34                 : 
      35                 : static int  SB_MatchText(const char *t, int tlen, const char *p, int plen,
      36                 :                          pg_locale_t locale, bool locale_is_c);
      37                 : static text *SB_do_like_escape(text *pat, text *esc);
      38                 : 
      39                 : static int  MB_MatchText(const char *t, int tlen, const char *p, int plen,
      40                 :                          pg_locale_t locale, bool locale_is_c);
      41                 : static text *MB_do_like_escape(text *pat, text *esc);
      42                 : 
      43                 : static int  UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
      44                 :                            pg_locale_t locale, bool locale_is_c);
      45                 : 
      46                 : static int  SB_IMatchText(const char *t, int tlen, const char *p, int plen,
      47                 :                           pg_locale_t locale, bool locale_is_c);
      48                 : 
      49                 : static int  GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
      50                 : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      51                 : 
      52                 : /*--------------------
      53                 :  * Support routine for MatchText. Compares given multibyte streams
      54                 :  * as wide characters. If they match, returns 1 otherwise returns 0.
      55                 :  *--------------------
      56                 :  */
      57                 : static inline int
      58 GIC         462 : wchareq(const char *p1, const char *p2)
      59 ECB             : {
      60                 :     int         p1_len;
      61                 : 
      62                 :     /* Optimization:  quickly compare the first byte. */
      63 GIC         462 :     if (*p1 != *p2)
      64 CBC         348 :         return 0;
      65 ECB             : 
      66 GIC         114 :     p1_len = pg_mblen(p1);
      67 CBC         114 :     if (pg_mblen(p2) != p1_len)
      68 LBC           0 :         return 0;
      69 EUB             : 
      70                 :     /* They are the same length */
      71 GIC         228 :     while (p1_len--)
      72 ECB             :     {
      73 GIC         114 :         if (*p1++ != *p2++)
      74 LBC           0 :             return 0;
      75 EUB             :     }
      76 GIC         114 :     return 1;
      77 ECB             : }
      78                 : 
      79                 : /*
      80                 :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      81                 :  * comparison of multibyte characters.  It did not work at all, however,
      82                 :  * because it relied on tolower() which has a single-byte API ... and
      83                 :  * towlower() wouldn't be much better since we have no suitably cheap way
      84                 :  * of getting a single character transformed to the system's wchar_t format.
      85                 :  * So now, we just downcase the strings using lower() and apply regular LIKE
      86                 :  * comparison.  This should be revisited when we install better locale support.
      87                 :  */
      88                 : 
      89                 : /*
      90                 :  * We do handle case-insensitive matching for single-byte encodings using
      91                 :  * fold-on-the-fly processing, however.
      92                 :  */
      93                 : static char
      94 UIC           0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
      95 EUB             : {
      96 UIC           0 :     if (locale_is_c)
      97 UBC           0 :         return pg_ascii_tolower(c);
      98 EUB             : #ifdef HAVE_LOCALE_T
      99 UIC           0 :     else if (locale)
     100 UBC           0 :         return tolower_l(c, locale->info.lt);
     101 EUB             : #endif
     102                 :     else
     103 UIC           0 :         return pg_tolower(c);
     104 EUB             : }
     105                 : 
     106                 : 
     107                 : #define NextByte(p, plen)   ((p)++, (plen)--)
     108                 : 
     109                 : /* Set up to compile like_match.c for multibyte characters */
     110                 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
     111                 : #define NextChar(p, plen) \
     112                 :     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
     113                 : #define CopyAdvChar(dst, src, srclen) \
     114                 :     do { int __l = pg_mblen(src); \
     115                 :          (srclen) -= __l; \
     116                 :          while (__l-- > 0) \
     117                 :              *(dst)++ = *(src)++; \
     118                 :        } while (0)
     119                 : 
     120                 : #define MatchText   MB_MatchText
     121                 : #define do_like_escape  MB_do_like_escape
     122                 : 
     123                 : #include "like_match.c"
     124                 : 
     125                 : /* Set up to compile like_match.c for single-byte characters */
     126                 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
     127                 : #define NextChar(p, plen) NextByte((p), (plen))
     128                 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     129                 : 
     130                 : #define MatchText   SB_MatchText
     131                 : #define do_like_escape  SB_do_like_escape
     132                 : 
     133                 : #include "like_match.c"
     134                 : 
     135                 : /* setup to compile like_match.c for single byte case insensitive matches */
     136                 : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
     137                 : #define NextChar(p, plen) NextByte((p), (plen))
     138                 : #define MatchText SB_IMatchText
     139                 : 
     140                 : #include "like_match.c"
     141                 : 
     142                 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     143                 : 
     144                 : #define NextChar(p, plen) \
     145                 :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     146                 : #define MatchText   UTF8_MatchText
     147                 : 
     148                 : #include "like_match.c"
     149                 : 
     150                 : /* Generic for all cases not requiring inline case-folding */
     151                 : static inline int
     152 GIC     1411360 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
     153 ECB             : {
     154 GIC     1411360 :     if (collation && !lc_ctype_is_c(collation))
     155 ECB             :     {
     156 GIC       89009 :         pg_locale_t locale = pg_newlocale_from_collation(collation);
     157 ECB             : 
     158 GNC       89009 :         if (!pg_locale_deterministic(locale))
     159 CBC          12 :             ereport(ERROR,
     160 ECB             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     161                 :                      errmsg("nondeterministic collations are not supported for LIKE")));
     162                 :     }
     163                 : 
     164 GIC     1411348 :     if (pg_database_encoding_max_length() == 1)
     165 CBC       21384 :         return SB_MatchText(s, slen, p, plen, 0, true);
     166         1389964 :     else if (GetDatabaseEncoding() == PG_UTF8)
     167         1389964 :         return UTF8_MatchText(s, slen, p, plen, 0, true);
     168 ECB             :     else
     169 UIC           0 :         return MB_MatchText(s, slen, p, plen, 0, true);
     170 EUB             : }
     171                 : 
     172                 : static inline int
     173 GIC       45760 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     174 ECB             : {
     175                 :     char       *s,
     176                 :                *p;
     177                 :     int         slen,
     178                 :                 plen;
     179 GIC       45760 :     pg_locale_t locale = 0;
     180 CBC       45760 :     bool        locale_is_c = false;
     181 ECB             : 
     182 GIC       45760 :     if (!OidIsValid(collation))
     183 ECB             :     {
     184                 :         /*
     185                 :          * This typically means that the parser could not resolve a conflict
     186                 :          * of implicit collations, so report it that way.
     187                 :          */
     188 UIC           0 :         ereport(ERROR,
     189 EUB             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     190                 :                  errmsg("could not determine which collation to use for ILIKE"),
     191                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     192                 :     }
     193                 : 
     194 GIC       45760 :     if (lc_ctype_is_c(collation))
     195 CBC       11455 :         locale_is_c = true;
     196 ECB             :     else
     197 GIC       34305 :         locale = pg_newlocale_from_collation(collation);
     198 ECB             : 
     199 GNC       45760 :     if (!pg_locale_deterministic(locale))
     200 CBC           6 :         ereport(ERROR,
     201 ECB             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     202                 :                  errmsg("nondeterministic collations are not supported for ILIKE")));
     203                 : 
     204                 :     /*
     205                 :      * For efficiency reasons, in the single byte case we don't call lower()
     206                 :      * on the pattern and text, but instead call SB_lower_char on each
     207                 :      * character.  In the multi-byte case we don't have much choice :-(. Also,
     208                 :      * ICU does not support single-character case folding, so we go the long
     209                 :      * way.
     210                 :      */
     211                 : 
     212 GIC       45754 :     if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
     213 ECB             :     {
     214 GIC       45754 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     215 ECB             :                                                      PointerGetDatum(pat)));
     216 GIC       45754 :         p = VARDATA_ANY(pat);
     217 CBC       45754 :         plen = VARSIZE_ANY_EXHDR(pat);
     218           45754 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     219 ECB             :                                                      PointerGetDatum(str)));
     220 GIC       45754 :         s = VARDATA_ANY(str);
     221 CBC       45754 :         slen = VARSIZE_ANY_EXHDR(str);
     222           45754 :         if (GetDatabaseEncoding() == PG_UTF8)
     223           45754 :             return UTF8_MatchText(s, slen, p, plen, 0, true);
     224 ECB             :         else
     225 UIC           0 :             return MB_MatchText(s, slen, p, plen, 0, true);
     226 EUB             :     }
     227                 :     else
     228                 :     {
     229 UIC           0 :         p = VARDATA_ANY(pat);
     230 UBC           0 :         plen = VARSIZE_ANY_EXHDR(pat);
     231               0 :         s = VARDATA_ANY(str);
     232               0 :         slen = VARSIZE_ANY_EXHDR(str);
     233               0 :         return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
     234 EUB             :     }
     235                 : }
     236                 : 
     237                 : /*
     238                 :  *  interface routines called by the function manager
     239                 :  */
     240                 : 
     241                 : Datum
     242 GIC       81542 : namelike(PG_FUNCTION_ARGS)
     243 ECB             : {
     244 GIC       81542 :     Name        str = PG_GETARG_NAME(0);
     245 CBC       81542 :     text       *pat = PG_GETARG_TEXT_PP(1);
     246 ECB             :     bool        result;
     247                 :     char       *s,
     248                 :                *p;
     249                 :     int         slen,
     250                 :                 plen;
     251                 : 
     252 GIC       81542 :     s = NameStr(*str);
     253 CBC       81542 :     slen = strlen(s);
     254           81542 :     p = VARDATA_ANY(pat);
     255           81542 :     plen = VARSIZE_ANY_EXHDR(pat);
     256 ECB             : 
     257 GIC       81542 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     258 ECB             : 
     259 GIC       81542 :     PG_RETURN_BOOL(result);
     260 ECB             : }
     261                 : 
     262                 : Datum
     263 GIC        2649 : namenlike(PG_FUNCTION_ARGS)
     264 ECB             : {
     265 GIC        2649 :     Name        str = PG_GETARG_NAME(0);
     266 CBC        2649 :     text       *pat = PG_GETARG_TEXT_PP(1);
     267 ECB             :     bool        result;
     268                 :     char       *s,
     269                 :                *p;
     270                 :     int         slen,
     271                 :                 plen;
     272                 : 
     273 GIC        2649 :     s = NameStr(*str);
     274 CBC        2649 :     slen = strlen(s);
     275            2649 :     p = VARDATA_ANY(pat);
     276            2649 :     plen = VARSIZE_ANY_EXHDR(pat);
     277 ECB             : 
     278 GIC        2649 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     279 ECB             : 
     280 GIC        2649 :     PG_RETURN_BOOL(result);
     281 ECB             : }
     282                 : 
     283                 : Datum
     284 GIC     1171131 : textlike(PG_FUNCTION_ARGS)
     285 ECB             : {
     286 GIC     1171131 :     text       *str = PG_GETARG_TEXT_PP(0);
     287 CBC     1171131 :     text       *pat = PG_GETARG_TEXT_PP(1);
     288 ECB             :     bool        result;
     289                 :     char       *s,
     290                 :                *p;
     291                 :     int         slen,
     292                 :                 plen;
     293                 : 
     294 GIC     1171131 :     s = VARDATA_ANY(str);
     295 CBC     1171131 :     slen = VARSIZE_ANY_EXHDR(str);
     296         1171131 :     p = VARDATA_ANY(pat);
     297         1171131 :     plen = VARSIZE_ANY_EXHDR(pat);
     298 ECB             : 
     299 GIC     1171131 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     300 ECB             : 
     301 GIC     1171119 :     PG_RETURN_BOOL(result);
     302 ECB             : }
     303                 : 
     304                 : Datum
     305 GIC      156038 : textnlike(PG_FUNCTION_ARGS)
     306 ECB             : {
     307 GIC      156038 :     text       *str = PG_GETARG_TEXT_PP(0);
     308 CBC      156038 :     text       *pat = PG_GETARG_TEXT_PP(1);
     309 ECB             :     bool        result;
     310                 :     char       *s,
     311                 :                *p;
     312                 :     int         slen,
     313                 :                 plen;
     314                 : 
     315 GIC      156038 :     s = VARDATA_ANY(str);
     316 CBC      156038 :     slen = VARSIZE_ANY_EXHDR(str);
     317          156038 :     p = VARDATA_ANY(pat);
     318          156038 :     plen = VARSIZE_ANY_EXHDR(pat);
     319 ECB             : 
     320 GIC      156038 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     321 ECB             : 
     322 GIC      156038 :     PG_RETURN_BOOL(result);
     323 ECB             : }
     324                 : 
     325                 : Datum
     326 GIC           6 : bytealike(PG_FUNCTION_ARGS)
     327 ECB             : {
     328 GIC           6 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     329 CBC           6 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     330 ECB             :     bool        result;
     331                 :     char       *s,
     332                 :                *p;
     333                 :     int         slen,
     334                 :                 plen;
     335                 : 
     336 GIC           6 :     s = VARDATA_ANY(str);
     337 CBC           6 :     slen = VARSIZE_ANY_EXHDR(str);
     338               6 :     p = VARDATA_ANY(pat);
     339               6 :     plen = VARSIZE_ANY_EXHDR(pat);
     340 ECB             : 
     341 GIC           6 :     result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
     342 ECB             : 
     343 GIC           6 :     PG_RETURN_BOOL(result);
     344 ECB             : }
     345                 : 
     346                 : Datum
     347 GIC           6 : byteanlike(PG_FUNCTION_ARGS)
     348 ECB             : {
     349 GIC           6 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     350 CBC           6 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     351 ECB             :     bool        result;
     352                 :     char       *s,
     353                 :                *p;
     354                 :     int         slen,
     355                 :                 plen;
     356                 : 
     357 GIC           6 :     s = VARDATA_ANY(str);
     358 CBC           6 :     slen = VARSIZE_ANY_EXHDR(str);
     359               6 :     p = VARDATA_ANY(pat);
     360               6 :     plen = VARSIZE_ANY_EXHDR(pat);
     361 ECB             : 
     362 GIC           6 :     result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
     363 ECB             : 
     364 GIC           6 :     PG_RETURN_BOOL(result);
     365 ECB             : }
     366                 : 
     367                 : /*
     368                 :  * Case-insensitive versions
     369                 :  */
     370                 : 
     371                 : Datum
     372 GIC       11448 : nameiclike(PG_FUNCTION_ARGS)
     373 ECB             : {
     374 GIC       11448 :     Name        str = PG_GETARG_NAME(0);
     375 CBC       11448 :     text       *pat = PG_GETARG_TEXT_PP(1);
     376 ECB             :     bool        result;
     377                 :     text       *strtext;
     378                 : 
     379 GIC       11448 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     380 ECB             :                                                  NameGetDatum(str)));
     381 GIC       11448 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     382 ECB             : 
     383 GIC       11448 :     PG_RETURN_BOOL(result);
     384 ECB             : }
     385                 : 
     386                 : Datum
     387 GIC           3 : nameicnlike(PG_FUNCTION_ARGS)
     388 ECB             : {
     389 GIC           3 :     Name        str = PG_GETARG_NAME(0);
     390 CBC           3 :     text       *pat = PG_GETARG_TEXT_PP(1);
     391 ECB             :     bool        result;
     392                 :     text       *strtext;
     393                 : 
     394 GIC           3 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     395 ECB             :                                                  NameGetDatum(str)));
     396 GIC           3 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     397 ECB             : 
     398 GIC           3 :     PG_RETURN_BOOL(result);
     399 ECB             : }
     400                 : 
     401                 : Datum
     402 GIC       34281 : texticlike(PG_FUNCTION_ARGS)
     403 ECB             : {
     404 GIC       34281 :     text       *str = PG_GETARG_TEXT_PP(0);
     405 CBC       34281 :     text       *pat = PG_GETARG_TEXT_PP(1);
     406 ECB             :     bool        result;
     407                 : 
     408 GIC       34281 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     409 ECB             : 
     410 GIC       34275 :     PG_RETURN_BOOL(result);
     411 ECB             : }
     412                 : 
     413                 : Datum
     414 GIC          28 : texticnlike(PG_FUNCTION_ARGS)
     415 ECB             : {
     416 GIC          28 :     text       *str = PG_GETARG_TEXT_PP(0);
     417 CBC          28 :     text       *pat = PG_GETARG_TEXT_PP(1);
     418 ECB             :     bool        result;
     419                 : 
     420 GIC          28 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     421 ECB             : 
     422 GIC          28 :     PG_RETURN_BOOL(result);
     423 ECB             : }
     424                 : 
     425                 : /*
     426                 :  * like_escape() --- given a pattern and an ESCAPE string,
     427                 :  * convert the pattern to use Postgres' standard backslash escape convention.
     428                 :  */
     429                 : Datum
     430 GIC         106 : like_escape(PG_FUNCTION_ARGS)
     431 ECB             : {
     432 GIC         106 :     text       *pat = PG_GETARG_TEXT_PP(0);
     433 CBC         106 :     text       *esc = PG_GETARG_TEXT_PP(1);
     434 ECB             :     text       *result;
     435                 : 
     436 GIC         106 :     if (pg_database_encoding_max_length() == 1)
     437 LBC           0 :         result = SB_do_like_escape(pat, esc);
     438 EUB             :     else
     439 GIC         106 :         result = MB_do_like_escape(pat, esc);
     440 ECB             : 
     441 GIC         106 :     PG_RETURN_TEXT_P(result);
     442 ECB             : }
     443                 : 
     444                 : /*
     445                 :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     446                 :  * convert the pattern to use Postgres' standard backslash escape convention.
     447                 :  */
     448                 : Datum
     449 GIC           6 : like_escape_bytea(PG_FUNCTION_ARGS)
     450 ECB             : {
     451 GIC           6 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     452 CBC           6 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     453               6 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     454 ECB             : 
     455 GIC           6 :     PG_RETURN_BYTEA_P((bytea *) result);
     456 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a